]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Mar 2013 20:06:09 +0000 (12:06 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 3 Mar 2013 20:06:09 +0000 (12:06 -0800)
Pull new ImgTec Meta architecture from James Hogan:
 "This adds core architecture support for Imagination's Meta processor
  cores, followed by some later miscellaneous arch/metag cleanups and
  fixes which I kept separate to ease review:

   - Support for basic Meta 1 (ATP) and Meta 2 (HTP) core architecture
   - A few fixes all over, particularly for symbol prefixes
   - A few privilege protection fixes
   - Several cleanups (setup.c includes, split out a lot of
     metag_ksyms.c)
   - Fix some missing exports
   - Convert hugetlb to use vm_unmapped_area()
   - Copy device tree to non-init memory
   - Provide dma_get_sgtable()"

* tag 'metag-v3.9-rc1-v4' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/metag: (61 commits)
  metag: Provide dma_get_sgtable()
  metag: prom.h: remove declaration of metag_dt_memblock_reserve()
  metag: copy devicetree to non-init memory
  metag: cleanup metag_ksyms.c includes
  metag: move mm/init.c exports out of metag_ksyms.c
  metag: move usercopy.c exports out of metag_ksyms.c
  metag: move setup.c exports out of metag_ksyms.c
  metag: move kick.c exports out of metag_ksyms.c
  metag: move traps.c exports out of metag_ksyms.c
  metag: move irq enable out of irqflags.h on SMP
  genksyms: fix metag symbol prefix on crc symbols
  metag: hugetlb: convert to vm_unmapped_area()
  metag: export clear_page and copy_page
  metag: export metag_code_cache_flush_all
  metag: protect more non-MMU memory regions
  metag: make TXPRIVEXT bits explicit
  metag: kernel/setup.c: sort includes
  perf: Enable building perf tools for Meta
  metag: add boot time LNKGET/LNKSET check
  metag: add __init to metag_cache_probe()
  ...

200 files changed:
Documentation/00-INDEX
Documentation/devicetree/bindings/metag/meta-intc.txt [new file with mode: 0644]
Documentation/kernel-parameters.txt
Documentation/metag/00-INDEX [new file with mode: 0644]
Documentation/metag/kernel-ABI.txt [new file with mode: 0644]
MAINTAINERS
arch/Kconfig
arch/metag/Kconfig [new file with mode: 0644]
arch/metag/Kconfig.debug [new file with mode: 0644]
arch/metag/Kconfig.soc [new file with mode: 0644]
arch/metag/Makefile [new file with mode: 0644]
arch/metag/boot/.gitignore [new file with mode: 0644]
arch/metag/boot/Makefile [new file with mode: 0644]
arch/metag/boot/dts/Makefile [new file with mode: 0644]
arch/metag/boot/dts/skeleton.dts [new file with mode: 0644]
arch/metag/boot/dts/skeleton.dtsi [new file with mode: 0644]
arch/metag/configs/meta1_defconfig [new file with mode: 0644]
arch/metag/configs/meta2_defconfig [new file with mode: 0644]
arch/metag/configs/meta2_smp_defconfig [new file with mode: 0644]
arch/metag/include/asm/Kbuild [new file with mode: 0644]
arch/metag/include/asm/atomic.h [new file with mode: 0644]
arch/metag/include/asm/atomic_lnkget.h [new file with mode: 0644]
arch/metag/include/asm/atomic_lock1.h [new file with mode: 0644]
arch/metag/include/asm/barrier.h [new file with mode: 0644]
arch/metag/include/asm/bitops.h [new file with mode: 0644]
arch/metag/include/asm/bug.h [new file with mode: 0644]
arch/metag/include/asm/cache.h [new file with mode: 0644]
arch/metag/include/asm/cacheflush.h [new file with mode: 0644]
arch/metag/include/asm/cachepart.h [new file with mode: 0644]
arch/metag/include/asm/checksum.h [new file with mode: 0644]
arch/metag/include/asm/clock.h [new file with mode: 0644]
arch/metag/include/asm/cmpxchg.h [new file with mode: 0644]
arch/metag/include/asm/cmpxchg_irq.h [new file with mode: 0644]
arch/metag/include/asm/cmpxchg_lnkget.h [new file with mode: 0644]
arch/metag/include/asm/cmpxchg_lock1.h [new file with mode: 0644]
arch/metag/include/asm/core_reg.h [new file with mode: 0644]
arch/metag/include/asm/cpu.h [new file with mode: 0644]
arch/metag/include/asm/da.h [new file with mode: 0644]
arch/metag/include/asm/delay.h [new file with mode: 0644]
arch/metag/include/asm/div64.h [new file with mode: 0644]
arch/metag/include/asm/dma-mapping.h [new file with mode: 0644]
arch/metag/include/asm/elf.h [new file with mode: 0644]
arch/metag/include/asm/fixmap.h [new file with mode: 0644]
arch/metag/include/asm/ftrace.h [new file with mode: 0644]
arch/metag/include/asm/global_lock.h [new file with mode: 0644]
arch/metag/include/asm/gpio.h [new file with mode: 0644]
arch/metag/include/asm/highmem.h [new file with mode: 0644]
arch/metag/include/asm/hugetlb.h [new file with mode: 0644]
arch/metag/include/asm/hwthread.h [new file with mode: 0644]
arch/metag/include/asm/io.h [new file with mode: 0644]
arch/metag/include/asm/irq.h [new file with mode: 0644]
arch/metag/include/asm/irqflags.h [new file with mode: 0644]
arch/metag/include/asm/l2cache.h [new file with mode: 0644]
arch/metag/include/asm/linkage.h [new file with mode: 0644]
arch/metag/include/asm/mach/arch.h [new file with mode: 0644]
arch/metag/include/asm/metag_isa.h [new file with mode: 0644]
arch/metag/include/asm/metag_mem.h [new file with mode: 0644]
arch/metag/include/asm/metag_regs.h [new file with mode: 0644]
arch/metag/include/asm/mman.h [new file with mode: 0644]
arch/metag/include/asm/mmu.h [new file with mode: 0644]
arch/metag/include/asm/mmu_context.h [new file with mode: 0644]
arch/metag/include/asm/mmzone.h [new file with mode: 0644]
arch/metag/include/asm/module.h [new file with mode: 0644]
arch/metag/include/asm/page.h [new file with mode: 0644]
arch/metag/include/asm/perf_event.h [new file with mode: 0644]
arch/metag/include/asm/pgalloc.h [new file with mode: 0644]
arch/metag/include/asm/pgtable.h [new file with mode: 0644]
arch/metag/include/asm/processor.h [new file with mode: 0644]
arch/metag/include/asm/prom.h [new file with mode: 0644]
arch/metag/include/asm/ptrace.h [new file with mode: 0644]
arch/metag/include/asm/setup.h [new file with mode: 0644]
arch/metag/include/asm/smp.h [new file with mode: 0644]
arch/metag/include/asm/sparsemem.h [new file with mode: 0644]
arch/metag/include/asm/spinlock.h [new file with mode: 0644]
arch/metag/include/asm/spinlock_lnkget.h [new file with mode: 0644]
arch/metag/include/asm/spinlock_lock1.h [new file with mode: 0644]
arch/metag/include/asm/spinlock_types.h [new file with mode: 0644]
arch/metag/include/asm/stacktrace.h [new file with mode: 0644]
arch/metag/include/asm/string.h [new file with mode: 0644]
arch/metag/include/asm/switch.h [new file with mode: 0644]
arch/metag/include/asm/syscall.h [new file with mode: 0644]
arch/metag/include/asm/syscalls.h [new file with mode: 0644]
arch/metag/include/asm/tbx.h [new file with mode: 0644]
arch/metag/include/asm/tcm.h [new file with mode: 0644]
arch/metag/include/asm/thread_info.h [new file with mode: 0644]
arch/metag/include/asm/tlb.h [new file with mode: 0644]
arch/metag/include/asm/tlbflush.h [new file with mode: 0644]
arch/metag/include/asm/topology.h [new file with mode: 0644]
arch/metag/include/asm/traps.h [new file with mode: 0644]
arch/metag/include/asm/uaccess.h [new file with mode: 0644]
arch/metag/include/asm/unistd.h [new file with mode: 0644]
arch/metag/include/asm/user_gateway.h [new file with mode: 0644]
arch/metag/include/uapi/asm/Kbuild [new file with mode: 0644]
arch/metag/include/uapi/asm/byteorder.h [new file with mode: 0644]
arch/metag/include/uapi/asm/ptrace.h [new file with mode: 0644]
arch/metag/include/uapi/asm/resource.h [new file with mode: 0644]
arch/metag/include/uapi/asm/sigcontext.h [new file with mode: 0644]
arch/metag/include/uapi/asm/siginfo.h [new file with mode: 0644]
arch/metag/include/uapi/asm/swab.h [new file with mode: 0644]
arch/metag/include/uapi/asm/unistd.h [new file with mode: 0644]
arch/metag/kernel/.gitignore [new file with mode: 0644]
arch/metag/kernel/Makefile [new file with mode: 0644]
arch/metag/kernel/asm-offsets.c [new file with mode: 0644]
arch/metag/kernel/cachepart.c [new file with mode: 0644]
arch/metag/kernel/clock.c [new file with mode: 0644]
arch/metag/kernel/core_reg.c [new file with mode: 0644]
arch/metag/kernel/da.c [new file with mode: 0644]
arch/metag/kernel/devtree.c [new file with mode: 0644]
arch/metag/kernel/dma.c [new file with mode: 0644]
arch/metag/kernel/ftrace.c [new file with mode: 0644]
arch/metag/kernel/ftrace_stub.S [new file with mode: 0644]
arch/metag/kernel/head.S [new file with mode: 0644]
arch/metag/kernel/irq.c [new file with mode: 0644]
arch/metag/kernel/kick.c [new file with mode: 0644]
arch/metag/kernel/machines.c [new file with mode: 0644]
arch/metag/kernel/metag_ksyms.c [new file with mode: 0644]
arch/metag/kernel/module.c [new file with mode: 0644]
arch/metag/kernel/perf/Makefile [new file with mode: 0644]
arch/metag/kernel/perf/perf_event.c [new file with mode: 0644]
arch/metag/kernel/perf/perf_event.h [new file with mode: 0644]
arch/metag/kernel/perf_callchain.c [new file with mode: 0644]
arch/metag/kernel/process.c [new file with mode: 0644]
arch/metag/kernel/ptrace.c [new file with mode: 0644]
arch/metag/kernel/setup.c [new file with mode: 0644]
arch/metag/kernel/signal.c [new file with mode: 0644]
arch/metag/kernel/smp.c [new file with mode: 0644]
arch/metag/kernel/stacktrace.c [new file with mode: 0644]
arch/metag/kernel/sys_metag.c [new file with mode: 0644]
arch/metag/kernel/tbiunexp.S [new file with mode: 0644]
arch/metag/kernel/tcm.c [new file with mode: 0644]
arch/metag/kernel/time.c [new file with mode: 0644]
arch/metag/kernel/topology.c [new file with mode: 0644]
arch/metag/kernel/traps.c [new file with mode: 0644]
arch/metag/kernel/user_gateway.S [new file with mode: 0644]
arch/metag/kernel/vmlinux.lds.S [new file with mode: 0644]
arch/metag/lib/Makefile [new file with mode: 0644]
arch/metag/lib/ashldi3.S [new file with mode: 0644]
arch/metag/lib/ashrdi3.S [new file with mode: 0644]
arch/metag/lib/checksum.c [new file with mode: 0644]
arch/metag/lib/clear_page.S [new file with mode: 0644]
arch/metag/lib/cmpdi2.S [new file with mode: 0644]
arch/metag/lib/copy_page.S [new file with mode: 0644]
arch/metag/lib/delay.c [new file with mode: 0644]
arch/metag/lib/div64.S [new file with mode: 0644]
arch/metag/lib/divsi3.S [new file with mode: 0644]
arch/metag/lib/ip_fast_csum.S [new file with mode: 0644]
arch/metag/lib/lshrdi3.S [new file with mode: 0644]
arch/metag/lib/memcpy.S [new file with mode: 0644]
arch/metag/lib/memmove.S [new file with mode: 0644]
arch/metag/lib/memset.S [new file with mode: 0644]
arch/metag/lib/modsi3.S [new file with mode: 0644]
arch/metag/lib/muldi3.S [new file with mode: 0644]
arch/metag/lib/ucmpdi2.S [new file with mode: 0644]
arch/metag/lib/usercopy.c [new file with mode: 0644]
arch/metag/mm/Kconfig [new file with mode: 0644]
arch/metag/mm/Makefile [new file with mode: 0644]
arch/metag/mm/cache.c [new file with mode: 0644]
arch/metag/mm/extable.c [new file with mode: 0644]
arch/metag/mm/fault.c [new file with mode: 0644]
arch/metag/mm/highmem.c [new file with mode: 0644]
arch/metag/mm/hugetlbpage.c [new file with mode: 0644]
arch/metag/mm/init.c [new file with mode: 0644]
arch/metag/mm/ioremap.c [new file with mode: 0644]
arch/metag/mm/l2cache.c [new file with mode: 0644]
arch/metag/mm/maccess.c [new file with mode: 0644]
arch/metag/mm/mmu-meta1.c [new file with mode: 0644]
arch/metag/mm/mmu-meta2.c [new file with mode: 0644]
arch/metag/mm/numa.c [new file with mode: 0644]
arch/metag/tbx/Makefile [new file with mode: 0644]
arch/metag/tbx/tbicore.S [new file with mode: 0644]
arch/metag/tbx/tbictx.S [new file with mode: 0644]
arch/metag/tbx/tbictxfpu.S [new file with mode: 0644]
arch/metag/tbx/tbidefr.S [new file with mode: 0644]
arch/metag/tbx/tbidspram.S [new file with mode: 0644]
arch/metag/tbx/tbilogf.S [new file with mode: 0644]
arch/metag/tbx/tbipcx.S [new file with mode: 0644]
arch/metag/tbx/tbiroot.S [new file with mode: 0644]
arch/metag/tbx/tbisoft.S [new file with mode: 0644]
arch/metag/tbx/tbistring.c [new file with mode: 0644]
arch/metag/tbx/tbitimer.S [new file with mode: 0644]
drivers/clocksource/Kconfig
drivers/clocksource/Makefile
drivers/clocksource/metag_generic.c [new file with mode: 0644]
drivers/irqchip/Makefile
drivers/irqchip/irq-metag-ext.c [new file with mode: 0644]
drivers/irqchip/irq-metag.c [new file with mode: 0644]
fs/binfmt_elf.c
include/asm-generic/io.h
include/asm-generic/unistd.h
include/clocksource/metag_generic.h [new file with mode: 0644]
include/linux/irqchip/metag-ext.h [new file with mode: 0644]
include/linux/irqchip/metag.h [new file with mode: 0644]
include/linux/mm.h
include/uapi/linux/elf.h
kernel/trace/ring_buffer.c
lib/Kconfig.debug
scripts/checkstack.pl
scripts/genksyms/genksyms.c
scripts/recordmcount.c
tools/perf/perf.h

index 0f3e8bbab8d79c5fddfb173f1bb76adf82b008f5..45b3df936d2fbc0ba30f798264cff33273a969c0 100644 (file)
@@ -299,6 +299,8 @@ memory-hotplug.txt
        - Hotpluggable memory support, how to use and current status.
 memory.txt
        - info on typical Linux memory problems.
+metag/
+       - directory with info about Linux on Meta architecture.
 mips/
        - directory with info about Linux on MIPS architecture.
 misc-devices/
diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644 (file)
index 0000000..8c47dcb
--- /dev/null
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+    - compatible: Specifies the compatibility list for the interrupt controller.
+      The type shall be <string> and the value shall include "img,meta-intc".
+
+    - num-banks: Specifies the number of interrupt banks (each of which can
+      handle 32 interrupt sources).
+
+    - interrupt-controller: The presence of this property identifies the node
+      as an interupt controller. No property value shall be defined.
+
+    - #interrupt-cells: Specifies the number of cells needed to encode an
+      interrupt source. The type shall be a <u32> and the value shall be 2.
+
+    - #address-cells: Specifies the number of cells needed to encode an
+      address. The type shall be <u32> and the value shall be 0. As such,
+      'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+    - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+  Interrupt specifiers consists of 2 cells encoded as follows:
+
+    - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+    - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+                  encoded as follows:
+                    1 = edge triggered
+                    4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+       /*
+        * Meta external trigger block
+        */
+       intc: intc {
+               // This is an interrupt controller node.
+               interrupt-controller;
+
+               // No address cells so that 'interrupt-map' nodes which
+               // reference this interrupt controller node do not need a parent
+               // address specifier.
+               #address-cells = <0>;
+
+               // Two cells to encode interrupt sources.
+               #interrupt-cells = <2>;
+
+               // Number of interrupt banks
+               num-banks = <2>;
+
+               // No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+               no-mask;
+
+               // Compatible with Meta hardware trigger block.
+               compatible = "img,meta-intc";
+       };
+
+Example 2:
+
+       /*
+        * An interrupt generating device that is wired to a Meta external
+        * trigger block.
+        */
+       uart1: uart@0x02004c00 {
+               // Interrupt source '5' that is level-sensitive.
+               // Note that there are only two cells as specified in the
+               // interrupt parent's '#interrupt-cells' property.
+               interrupts = <5 4 /* level */>;
+
+               // The interrupt controller that this device is wired to.
+               interrupt-parent = <&intc>;
+       };
index 3a54fca730c0ce2bccdb73f847e58b1876ee34c2..4609e81dbc37fc2dbfa005ff607890df3a8bbc6b 100644 (file)
@@ -978,6 +978,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                               If specified, z/VM IUCV HVC accepts connections
                               from listed z/VM user IDs only.
 
+       hwthread_map=   [METAG] Comma-separated list of Linux cpu id to
+                               hardware thread id mappings.
+                               Format: <cpu>:<hwthread>
+
        keep_bootcon    [KNL]
                        Do not unregister boot console at start. This is only
                        useful for debugging when something happens in the window
diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX
new file mode 100644 (file)
index 0000000..db11c51
--- /dev/null
@@ -0,0 +1,4 @@
+00-INDEX
+       - this file
+kernel-ABI.txt
+       - Documents metag ABI details
diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt
new file mode 100644 (file)
index 0000000..7b8dee8
--- /dev/null
@@ -0,0 +1,256 @@
+                       ==========================
+                       KERNEL ABIS FOR METAG ARCH
+                       ==========================
+
+This document describes the Linux ABIs for the metag architecture, and has the
+following sections:
+
+ (*) Outline of registers
+ (*) Userland registers
+ (*) Kernel registers
+ (*) System call ABI
+ (*) Calling conventions
+
+
+====================
+OUTLINE OF REGISTERS
+====================
+
+The main Meta core registers are arranged in units:
+
+       UNIT    Type    DESCRIPTION     GP      EXT     PRIV    GLOBAL
+       ======= ======= =============== ======= ======= ======= =======
+       CT      Special Control unit
+       D0      General Data unit 0     0-7     8-15    16-31   16-31
+       D1      General Data unit 1     0-7     8-15    16-31   16-31
+       A0      General Address unit 0  0-3     4-7      8-15    8-15
+       A1      General Address unit 1  0-3     4-7      8-15    8-15
+       PC      Special PC unit         0                1
+       PORT    Special Ports
+       TR      Special Trigger unit                     0-7
+       TT      Special Trace unit                       0-5
+       FX      General FP unit                 0-15
+
+GP registers form part of the main context.
+
+Extended context registers (EXT) may not be present on all hardware threads and
+can be context switched if support is enabled and the appropriate bits are set
+in e.g. the D0.8 register to indicate what extended state to preserve.
+
+Global registers are shared between threads and are privilege protected.
+
+See arch/metag/include/asm/metag_regs.h for definitions relating to core
+registers and the fields and bits they contain. See the TRMs for further details
+about special registers.
+
+Several special registers are preserved in the main context, these are the
+interesting ones:
+
+       REG     (ALIAS)         PURPOSE
+       ======================= ===============================================
+       CT.1    (TXMODE)        Processor mode bits (particularly for DSP)
+       CT.2    (TXSTATUS)      Condition flags and LSM_STEP (MGET/MSET step)
+       CT.3    (TXRPT)         Branch repeat counter
+       PC.0    (PC)            Program counter
+
+Some of the general registers have special purposes in the ABI and therefore
+have aliases:
+
+       D0 REG  (ALIAS) PURPOSE         D1 REG  (ALIAS) PURPOSE
+       =============== =============== =============== =======================
+       D0.0    (D0Re0) 32bit result    D1.0    (D1Re0) Top half of 64bit result
+       D0.1    (D0Ar6) Argument 6      D1.1    (D1Ar5) Argument 5
+       D0.2    (D0Ar4) Argument 4      D1.2    (D1Ar3) Argument 3
+       D0.3    (D0Ar2) Argument 2      D1.3    (D1Ar1) Argument 1
+       D0.4    (D0FrT) Frame temp      D1.4    (D1RtP) Return pointer
+       D0.5            Call preserved  D1.5            Call preserved
+       D0.6            Call preserved  D1.6            Call preserved
+       D0.7            Call preserved  D1.7            Call preserved
+
+       A0 REG  (ALIAS) PURPOSE         A1 REG  (ALIAS) PURPOSE
+       =============== =============== =============== =======================
+       A0.0    (A0StP) Stack pointer   A1.0    (A1GbP) Global base pointer
+       A0.1    (A0FrP) Frame pointer   A1.1    (A1LbP) Local base pointer
+       A0.2                            A1.2
+       A0.3                            A1.3
+
+
+==================
+USERLAND REGISTERS
+==================
+
+All the general purpose D0, D1, A0, A1 registers are preserved when entering the
+kernel (including asynchronous events such as interrupts and timer ticks) except
+the following which have special purposes in the ABI:
+
+       REGISTERS       WHEN    STATUS          PURPOSE
+       =============== ======= =============== ===============================
+       D0.8            DSP     Preserved       ECH, determines what extended
+                                               DSP state to preserve.
+       A0.0    (A0StP) ALWAYS  Preserved       Stack >= A0StP may be clobbered
+                                               at any time by the creation of a
+                                               signal frame.
+       A1.0    (A1GbP) SMP     Clobbered       Used as temporary for loading
+                                               kernel stack pointer and saving
+                                               core context.
+       A0.15           !SMP    Protected       Stores kernel stack pointer.
+       A1.15           ALWAYS  Protected       Stores kernel base pointer.
+
+On UP A0.15 is used to store the kernel stack pointer for storing the userland
+context. A0.15 is global between hardware threads though which means it cannot
+be used on SMP for this purpose. Since no protected local registers are
+available A1GbP is reserved for use as a temporary to allow a percpu stack
+pointer to be loaded for storing the rest of the context.
+
+
+================
+KERNEL REGISTERS
+================
+
+When in the kernel the following registers have special purposes in the ABI:
+
+       REGISTERS       WHEN    STATUS          PURPOSE
+       =============== ======= =============== ===============================
+       A0.0    (A0StP) ALWAYS  Preserved       Stack >= A0StP may be clobbered
+                                               at any time by the creation of
+                                               an irq signal frame.
+       A1.0    (A1GbP) ALWAYS  Preserved       Reserved (kernel base pointer).
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+When a system call is made, the following registers are effective:
+
+       REGISTERS       CALL                    RETURN
+       =============== ======================= ===============================
+       D0.0    (D0Re0)                         Return value (or -errno)
+       D1.0    (D1Re0) System call number      Clobbered
+       D0.1    (D0Ar6) Syscall arg #6          Preserved
+       D1.1    (D1Ar5) Syscall arg #5          Preserved
+       D0.2    (D0Ar4) Syscall arg #4          Preserved
+       D1.2    (D1Ar3) Syscall arg #3          Preserved
+       D0.3    (D0Ar2) Syscall arg #2          Preserved
+       D1.3    (D1Ar1) Syscall arg #1          Preserved
+
+Due to the limited number of argument registers and some system calls with badly
+aligned 64-bit arguments, 64-bit values are always packed in consecutive
+arguments, even if this is contrary to the normal calling conventions (where the
+two halves would go in a matching pair of data registers).
+
+For example fadvise64_64 usually has the signature:
+
+       long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice);
+
+But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed:
+
+       long sys_fadvise64_64_metag(i32 fd,      i32 offs_lo,
+                                   i32 offs_hi, i32 len_lo,
+                                   i32 len_hi,  i32 advice)
+
+So the arguments are packed in the registers like this:
+
+       D0 REG  (ALIAS) VALUE           D1 REG  (ALIAS) VALUE
+       =============== =============== =============== =======================
+       D0.1    (D0Ar6) advice          D1.1    (D1Ar5) hi(len)
+       D0.2    (D0Ar4) lo(len)         D1.2    (D1Ar3) hi(offs)
+       D0.3    (D0Ar2) lo(offs)        D1.3    (D1Ar1) fd
+
+
+===================
+CALLING CONVENTIONS
+===================
+
+These calling conventions apply to both user and kernel code. The stack grows
+from low addresses to high addresses in the metag ABI. The stack pointer (A0StP)
+should always point to the next free address on the stack and should at all
+times be 64-bit aligned. The following registers are effective at the point of a
+call:
+
+       REGISTERS       CALL                    RETURN
+       =============== ======================= ===============================
+       D0.0    (D0Re0)                         32bit return value
+       D1.0    (D1Re0)                         Upper half of 64bit return value
+       D0.1    (D0Ar6) 32bit argument #6       Clobbered
+       D1.1    (D1Ar5) 32bit argument #5       Clobbered
+       D0.2    (D0Ar4) 32bit argument #4       Clobbered
+       D1.2    (D1Ar3) 32bit argument #3       Clobbered
+       D0.3    (D0Ar2) 32bit argument #2       Clobbered
+       D1.3    (D1Ar1) 32bit argument #1       Clobbered
+       D0.4    (D0FrT)                         Clobbered
+       D1.4    (D1RtP) Return pointer          Clobbered
+       D{0-1}.{5-7}                            Preserved
+       A0.0    (A0StP) Stack pointer           Preserved
+       A1.0    (A0GbP)                         Preserved
+       A0.1    (A0FrP) Frame pointer           Preserved
+       A1.1    (A0LbP)                         Preserved
+       A{0-1},{2-3}                            Clobbered
+
+64-bit arguments are placed in matching pairs of registers (i.e. the same
+register number in both D0 and D1 units), with the least significant half in D0
+and the most significant half in D1, leaving a gap where necessary. Futher
+arguments are stored on the stack in reverse order (earlier arguments at higher
+addresses):
+
+       ADDRESS         0     1     2     3     4     5     6     7
+       =============== ===== ===== ===== ===== ===== ===== ===== =====
+       A0StP       -->
+       A0StP-0x08      32bit argument #8       32bit argument #7
+       A0StP-0x10      32bit argument #10      32bit argument #9
+
+Function prologues tend to look a bit like this:
+
+       /* If frame pointer in use, move it to frame temp register so it can be
+          easily pushed onto stack */
+       MOV     D0FrT,A0FrP
+
+       /* If frame pointer in use, set it to stack pointer */
+       ADD     A0FrP,A0StP,#0
+
+       /* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */
+       MSETL   [A0StP++],D0FrT,D0.5,D0.6,D0.7
+
+       /* Allocate some stack space for local variables */
+       ADD     A0StP,A0StP,#0x10
+
+At this point the stack would look like this:
+
+       ADDRESS         0     1     2     3     4     5     6     7
+       =============== ===== ===== ===== ===== ===== ===== ===== =====
+       A0StP       -->
+       A0StP-0x08
+       A0StP-0x10
+       A0StP-0x18      Old D0.7                Old D1.7
+       A0StP-0x20      Old D0.6                Old D1.6
+       A0StP-0x28      Old D0.5                Old D1.5
+       A0FrP       --> Old A0FrP (frame ptr)   Old D1RtP (return ptr)
+       A0FrP-0x08      32bit argument #8       32bit argument #7
+       A0FrP-0x10      32bit argument #10      32bit argument #9
+
+Function epilogues tend to differ depending on the use of a frame pointer. An
+example of a frame pointer epilogue:
+
+       /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */
+       MGETL   D0FrT,D0.5,D0.6,D0.7,[A0FrP++]
+       /* Restore stack pointer to where frame pointer was before increment */
+       SUB     A0StP,A0FrP,#0x20
+       /* Restore frame pointer from frame temp */
+       MOV     A0FrP,D0FrT
+       /* Return to caller via restored return pointer */
+       MOV     PC,D1RtP
+
+If the function hasn't touched the frame pointer, MGETL cannot be safely used
+with A0StP as it always increments and that would expose the stack to clobbering
+by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL
+split into separate GETL instructions:
+
+       /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */
+       GETL    D0FrT,D1RtP,[A0StP+#-0x30]
+       GETL    D0.5,D1.5,[A0StP+#-0x28]
+       GETL    D0.6,D1.6,[A0StP+#-0x20]
+       GETL    D0.7,D1.7,[A0StP+#-0x18]
+       /* Restore stack pointer */
+       SUB     A0StP,A0StP,#0x30
+       /* Return to caller via restored return pointer */
+       MOV     PC,D1RtP
index aea0adf414dc17118b79f632afb93edf9ad5f5b5..e95b1e944eb7d26e19b712af2ffa3788d47be2ec 100644 (file)
@@ -5204,6 +5204,18 @@ F:       drivers/mtd/
 F:     include/linux/mtd/
 F:     include/uapi/mtd/
 
+METAG ARCHITECTURE
+M:     James Hogan <james.hogan@imgtec.com>
+S:     Supported
+F:     arch/metag/
+F:     Documentation/metag/
+F:     Documentation/devicetree/bindings/metag/
+F:     drivers/clocksource/metag_generic.c
+F:     drivers/irqchip/irq-metag.c
+F:     drivers/irqchip/irq-metag-ext.c
+F:     drivers/tty/metag_da.c
+F:     fs/imgdafs/
+
 MICROBLAZE ARCHITECTURE
 M:     Michal Simek <monstr@monstr.eu>
 L:     microblaze-uclinux@itee.uq.edu.au (moderated for non-subscribers)
index dcd91a85536a631057cd7cabec89583b63e1f468..5a1779c93940153b30ba84a561b75aaec1e109ea 100644 (file)
@@ -103,6 +103,22 @@ config UPROBES
 
          If in doubt, say "N".
 
+config HAVE_64BIT_ALIGNED_ACCESS
+       def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
+       help
+         Some architectures require 64 bit accesses to be 64 bit
+         aligned, which also requires structs containing 64 bit values
+         to be 64 bit aligned too. This includes some 32 bit
+         architectures which can do 64 bit accesses, as well as 64 bit
+         architectures without unaligned access.
+
+         This symbol should be selected by an architecture if 64 bit
+         accesses are required to be 64 bit aligned in this way even
+         though it is not a 64 bit architecture.
+
+         See Documentation/unaligned-memory-access.txt for more
+         information on the topic of unaligned memory accesses.
+
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
        bool
        help
diff --git a/arch/metag/Kconfig b/arch/metag/Kconfig
new file mode 100644 (file)
index 0000000..afc8973
--- /dev/null
@@ -0,0 +1,290 @@
+config SYMBOL_PREFIX
+       string
+       default "_"
+
+config METAG
+       def_bool y
+       select EMBEDDED
+       select GENERIC_ATOMIC64
+       select GENERIC_CLOCKEVENTS
+       select GENERIC_IRQ_SHOW
+       select GENERIC_SMP_IDLE_THREAD
+       select HAVE_64BIT_ALIGNED_ACCESS
+       select HAVE_ARCH_TRACEHOOK
+       select HAVE_C_RECORDMCOUNT
+       select HAVE_DEBUG_KMEMLEAK
+       select HAVE_DYNAMIC_FTRACE
+       select HAVE_FTRACE_MCOUNT_RECORD
+       select HAVE_FUNCTION_TRACER
+       select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+       select HAVE_GENERIC_HARDIRQS
+       select HAVE_KERNEL_BZIP2
+       select HAVE_KERNEL_GZIP
+       select HAVE_KERNEL_LZO
+       select HAVE_KERNEL_XZ
+       select HAVE_MEMBLOCK
+       select HAVE_MEMBLOCK_NODE_MAP
+       select HAVE_MOD_ARCH_SPECIFIC
+       select HAVE_PERF_EVENTS
+       select HAVE_SYSCALL_TRACEPOINTS
+       select IRQ_DOMAIN
+       select MODULES_USE_ELF_RELA
+       select OF
+       select OF_EARLY_FLATTREE
+       select SPARSE_IRQ
+
+config STACKTRACE_SUPPORT
+       def_bool y
+
+config LOCKDEP_SUPPORT
+       def_bool y
+
+config HAVE_LATENCYTOP_SUPPORT
+       def_bool y
+
+config RWSEM_GENERIC_SPINLOCK
+       def_bool y
+
+config RWSEM_XCHGADD_ALGORITHM
+       bool
+
+config GENERIC_HWEIGHT
+       def_bool y
+
+config GENERIC_CALIBRATE_DELAY
+       def_bool y
+
+config GENERIC_GPIO
+       def_bool n
+
+config NO_IOPORT
+       def_bool y
+
+source "init/Kconfig"
+
+source "kernel/Kconfig.freezer"
+
+menu "Processor type and features"
+
+config MMU
+       def_bool y
+
+config STACK_GROWSUP
+       def_bool y
+
+config HOTPLUG_CPU
+       bool "Enable CPU hotplug support"
+       depends on SMP
+       help
+         Say Y here to allow turning CPUs off and on. CPUs can be
+         controlled through /sys/devices/system/cpu.
+
+         Say N if you want to disable CPU hotplug.
+
+config HIGHMEM
+       bool "High Memory Support"
+       help
+         The address space of Meta processors is only 4 Gigabytes large
+         and it has to accommodate user address space, kernel address
+         space as well as some memory mapped IO. That means that, if you
+         have a large amount of physical memory and/or IO, not all of the
+         memory can be "permanently mapped" by the kernel. The physical
+         memory that is not permanently mapped is called "high memory".
+
+         Depending on the selected kernel/user memory split, minimum
+         vmalloc space and actual amount of RAM, you may not need this
+         option which should result in a slightly faster kernel.
+
+         If unsure, say n.
+
+source "arch/metag/mm/Kconfig"
+
+source "arch/metag/Kconfig.soc"
+
+config METAG_META12
+       bool
+       help
+         Select this from the SoC config symbol to indicate that it contains a
+         Meta 1.2 core.
+
+config METAG_META21
+       bool
+       help
+         Select this from the SoC config symbol to indicate that it contains a
+         Meta 2.1 core.
+
+config SMP
+       bool "Symmetric multi-processing support"
+       depends on METAG_META21 && METAG_META21_MMU
+       select USE_GENERIC_SMP_HELPERS
+       help
+         This enables support for systems with more than one thread running
+         Linux. If you have a system with only one thread running Linux,
+         say N. Otherwise, say Y.
+
+config NR_CPUS
+       int "Maximum number of CPUs (2-4)" if SMP
+       range 2 4 if SMP
+       default "1" if !SMP
+       default "4" if SMP
+
+config METAG_SMP_WRITE_REORDERING
+       bool
+       help
+         This attempts to prevent cache-memory incoherence due to external
+         reordering of writes from different hardware threads when SMP is
+         enabled. It adds fences (system event 0) to smp_mb and smp_rmb in an
+         attempt to catch some of the cases, and also before writes to shared
+         memory in LOCK1 protected atomics and spinlocks.
+         This will not completely prevent cache incoherency on affected cores.
+
+config METAG_LNKGET_AROUND_CACHE
+       bool
+       depends on METAG_META21
+       help
+         This indicates that the LNKGET/LNKSET instructions go around the
+         cache, which requires some extra cache flushes when the memory needs
+         to be accessed by normal GET/SET instructions too.
+
+choice
+       prompt "Atomicity primitive"
+       default METAG_ATOMICITY_LNKGET
+       help
+         This option selects the mechanism for performing atomic operations.
+
+config METAG_ATOMICITY_IRQSOFF
+       depends on !SMP
+       bool "irqsoff"
+       help
+         This option disables interrupts to achieve atomicity. This mechanism
+         is not SMP-safe.
+
+config METAG_ATOMICITY_LNKGET
+       depends on METAG_META21
+       bool "lnkget/lnkset"
+       help
+         This option uses the LNKGET and LNKSET instructions to achieve
+         atomicity. LNKGET/LNKSET are load-link/store-conditional instructions.
+         Choose this option if your system requires low latency.
+
+config METAG_ATOMICITY_LOCK1
+       depends on SMP
+       bool "lock1"
+       help
+         This option uses the LOCK1 instruction for atomicity. This is mainly
+         provided as a debugging aid if the lnkget/lnkset atomicity primitive
+         isn't working properly.
+
+endchoice
+
+config METAG_FPU
+       bool "FPU Support"
+       depends on METAG_META21
+       default y
+       help
+         This option allows processes to use FPU hardware available with this
+         CPU. If this option is not enabled FPU registers will not be saved
+         and restored on context-switch.
+
+         If you plan on running programs which are compiled to use hard floats
+         say Y here.
+
+config METAG_DSP
+       bool "DSP Support"
+       help
+         This option allows processes to use DSP hardware available
+         with this CPU. If this option is not enabled DSP registers
+         will not be saved and restored on context-switch.
+
+         If you plan on running DSP programs say Y here.
+
+config METAG_PERFCOUNTER_IRQS
+       bool "PerfCounters interrupt support"
+       depends on METAG_META21
+       help
+         This option enables using interrupts to collect information from
+         Performance Counters. This option is supported in new META21
+         (starting from HTP265).
+
+         When disabled, Performance Counters information will be collected
+         based on Timer Interrupt.
+
+config METAG_DA
+       bool "DA support"
+       help
+         Say Y if you plan to use a DA debug adapter with Linux. The presence
+         of the DA will be detected automatically at boot, so it is safe to say
+         Y to this option even when booting without a DA.
+
+         This enables support for services provided by DA JTAG debug adapters,
+         such as:
+         - communication over DA channels (such as the console driver).
+         - use of the DA filesystem.
+
+menu "Boot options"
+
+config METAG_BUILTIN_DTB
+       bool "Embed DTB in kernel image"
+       default y
+       help
+         Embeds a device tree binary in the kernel image.
+
+config METAG_BUILTIN_DTB_NAME
+       string "Built in DTB"
+       depends on METAG_BUILTIN_DTB
+       help
+         Set the name of the DTB to embed (leave blank to pick one
+         automatically based on kernel configuration).
+
+config CMDLINE_BOOL
+       bool "Default bootloader kernel arguments"
+
+config CMDLINE
+       string "Kernel command line"
+       depends on CMDLINE_BOOL
+       help
+         On some architectures there is currently no way for the boot loader
+         to pass arguments to the kernel. For these architectures, you should
+         supply some command-line options at build time by entering them
+         here.
+
+config CMDLINE_FORCE
+       bool "Force default kernel command string"
+       depends on CMDLINE_BOOL
+       help
+         Set this to have arguments from the default kernel command string
+         override those passed by the boot loader.
+
+endmenu
+
+source "kernel/Kconfig.preempt"
+
+source kernel/Kconfig.hz
+
+endmenu
+
+menu "Power management options"
+
+source kernel/power/Kconfig
+
+endmenu
+
+menu "Executable file formats"
+
+source "fs/Kconfig.binfmt"
+
+endmenu
+
+source "net/Kconfig"
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/metag/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
diff --git a/arch/metag/Kconfig.debug b/arch/metag/Kconfig.debug
new file mode 100644 (file)
index 0000000..e45bbf6
--- /dev/null
@@ -0,0 +1,40 @@
+menu "Kernel hacking"
+
+config TRACE_IRQFLAGS_SUPPORT
+       bool
+       default y
+
+source "lib/Kconfig.debug"
+
+config DEBUG_STACKOVERFLOW
+       bool "Check for stack overflows"
+       depends on DEBUG_KERNEL
+       help
+         This option will cause messages to be printed if free stack space
+         drops below a certain limit.
+
+config 4KSTACKS
+       bool "Use 4Kb for kernel stacks instead of 8Kb"
+       depends on DEBUG_KERNEL
+       help
+         If you say Y here the kernel will use a 4Kb stacksize for the
+         kernel stack attached to each process/thread. This facilitates
+         running more threads on a system and also reduces the pressure
+         on the VM subsystem for higher order allocations. This option
+         will also use IRQ stacks to compensate for the reduced stackspace.
+
+config METAG_FUNCTION_TRACE
+       bool "Output Meta real-time trace data for function entry/exit"
+       help
+         If you say Y here the kernel will use the Meta hardware trace
+         unit to output information about function entry and exit that
+         can be used by a debugger for profiling and call-graphs.
+
+config METAG_POISON_CATCH_BUFFERS
+       bool "Poison catch buffer contents on kernel entry"
+       help
+         If you say Y here the kernel will write poison data to the
+         catch buffer registers on kernel entry. This will make any
+         problem with catch buffer handling much more apparent.
+
+endmenu
diff --git a/arch/metag/Kconfig.soc b/arch/metag/Kconfig.soc
new file mode 100644 (file)
index 0000000..ec079cf
--- /dev/null
@@ -0,0 +1,55 @@
+choice
+       prompt "SoC Type"
+       default META21_FPGA
+
+config META12_FPGA
+       bool "Meta 1.2 FPGA"
+       select METAG_META12
+       help
+         This is a Meta 1.2 FPGA bitstream, just a bare CPU.
+
+config META21_FPGA
+       bool "Meta 2.1 FPGA"
+       select METAG_META21
+       help
+         This is a Meta 2.1 FPGA bitstream, just a bare CPU.
+
+endchoice
+
+menu "SoC configuration"
+
+if METAG_META21
+
+# Meta 2.x specific options
+
+config METAG_META21_MMU
+       bool "Meta 2.x MMU mode"
+       default y
+       help
+         Use the Meta 2.x MMU in extended mode.
+
+config METAG_UNALIGNED
+       bool "Meta 2.x unaligned access checking"
+       default y
+       help
+         All memory accesses will be checked for alignment and an exception
+         raised on unaligned accesses. This feature does cost performance
+         but without it there will be no notification of this type of error.
+
+config METAG_USER_TCM
+       bool "Meta on-chip memory support for userland"
+       select GENERIC_ALLOCATOR
+       default y
+       help
+         Allow the on-chip memories of Meta SoCs to be used by user
+         applications.
+
+endif
+
+config METAG_HALT_ON_PANIC
+       bool "Halt the core on panic"
+       help
+         Halt the core when a panic occurs. This is useful when running
+         pre-production silicon or in an FPGA environment.
+
+endmenu
diff --git a/arch/metag/Makefile b/arch/metag/Makefile
new file mode 100644 (file)
index 0000000..81bd6a1
--- /dev/null
@@ -0,0 +1,87 @@
+#
+# metag/Makefile
+#
+# This file is included by the global makefile so that you can add your own
+# architecture-specific flags and dependencies. Remember to do have actions
+# for "archclean" cleaning up for this architecture.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 1994 by Linus Torvalds
+#               2007,2008,2012 by Imagination Technologies Ltd.
+#
+
+LDFLAGS                                        :=
+OBJCOPYFLAGS                           := -O binary -R .note -R .comment -S
+
+checkflags-$(CONFIG_METAG_META12)      += -DMETAC_1_2
+checkflags-$(CONFIG_METAG_META21)      += -DMETAC_2_1
+CHECKFLAGS                             += -D__metag__ $(checkflags-y)
+
+KBUILD_DEFCONFIG                       := meta2_defconfig
+
+sflags-$(CONFIG_METAG_META12)          += -mmetac=1.2
+ifeq ($(CONFIG_METAG_META12),y)
+# Only use TBI API 1.4 if DSP is enabled for META12 cores
+sflags-$(CONFIG_METAG_DSP)             += -DTBI_1_4
+endif
+sflags-$(CONFIG_METAG_META21)          += -mmetac=2.1 -DTBI_1_4
+
+cflags-$(CONFIG_METAG_FUNCTION_TRACE)  += -mhwtrace-leaf -mhwtrace-retpc
+cflags-$(CONFIG_METAG_META21)          += -mextensions=bex
+
+KBUILD_CFLAGS                          += -pipe
+KBUILD_CFLAGS                          += -ffunction-sections
+
+KBUILD_CFLAGS                          += $(sflags-y) $(cflags-y)
+KBUILD_AFLAGS                          += $(sflags-y)
+
+LDFLAGS_vmlinux                                := $(ldflags-y)
+
+head-y                                 := arch/metag/kernel/head.o
+
+core-y                                 += arch/metag/boot/dts/
+core-y                                 += arch/metag/kernel/
+core-y                                 += arch/metag/mm/
+
+libs-y                                 += arch/metag/lib/
+libs-y                                 += arch/metag/tbx/
+
+boot                                   := arch/metag/boot
+
+boot_targets                           += uImage
+boot_targets                           += uImage.gz
+boot_targets                           += uImage.bz2
+boot_targets                           += uImage.xz
+boot_targets                           += uImage.lzo
+boot_targets                           += uImage.bin
+boot_targets                           += vmlinux.bin
+
+PHONY                                  += $(boot_targets)
+
+all: vmlinux.bin
+
+$(boot_targets): vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+%.dtb %.dtb.S %.dtb.o: scripts
+       $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
+
+dtbs: scripts
+       $(Q)$(MAKE) $(build)=$(boot)/dts dtbs
+
+archclean:
+       $(Q)$(MAKE) $(clean)=$(boot)
+
+define archhelp
+  echo  '* vmlinux.bin - Binary kernel image (arch/$(ARCH)/boot/vmlinux.bin)'
+  @echo '  uImage      - Alias to bootable U-Boot image'
+  @echo '  uImage.bin  - Kernel-only image for U-Boot (bin)'
+  @echo '  uImage.gz   - Kernel-only image for U-Boot (gzip)'
+  @echo '  uImage.bz2  - Kernel-only image for U-Boot (bzip2)'
+  @echo '  uImage.xz   - Kernel-only image for U-Boot (xz)'
+  @echo '  uImage.lzo  - Kernel-only image for U-Boot (lzo)'
+  @echo '  dtbs                - Build device tree blobs for enabled boards'
+endef
diff --git a/arch/metag/boot/.gitignore b/arch/metag/boot/.gitignore
new file mode 100644 (file)
index 0000000..a021da2
--- /dev/null
@@ -0,0 +1,4 @@
+vmlinux*
+uImage*
+ramdisk.*
+*.dtb
diff --git a/arch/metag/boot/Makefile b/arch/metag/boot/Makefile
new file mode 100644 (file)
index 0000000..5a1f88c
--- /dev/null
@@ -0,0 +1,68 @@
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2007,2012  Imagination Technologies Ltd.
+#
+
+suffix-y := bin
+suffix-$(CONFIG_KERNEL_GZIP)   := gz
+suffix-$(CONFIG_KERNEL_BZIP2)  := bz2
+suffix-$(CONFIG_KERNEL_XZ)     := xz
+suffix-$(CONFIG_KERNEL_LZO)    := lzo
+
+targets += vmlinux.bin
+targets += uImage
+targets += uImage.gz
+targets += uImage.bz2
+targets += uImage.xz
+targets += uImage.lzo
+targets += uImage.bin
+
+extra-y += vmlinux.bin
+extra-y += vmlinux.bin.gz
+extra-y += vmlinux.bin.bz2
+extra-y += vmlinux.bin.xz
+extra-y += vmlinux.bin.lzo
+
+UIMAGE_LOADADDR = $(CONFIG_PAGE_OFFSET)
+
+ifeq ($(CONFIG_FUNCTION_TRACER),y)
+orig_cflags := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst -pg, , $(orig_cflags))
+endif
+
+$(obj)/vmlinux.bin: vmlinux FORCE
+       $(call if_changed,objcopy)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,gzip)
+
+$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,bzip2)
+
+$(obj)/vmlinux.bin.xz: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,xzkern)
+
+$(obj)/vmlinux.bin.lzo: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,lzo)
+
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+       $(call if_changed,uimage,gzip)
+
+$(obj)/uImage.bz2: $(obj)/vmlinux.bin.bz2 FORCE
+       $(call if_changed,uimage,bzip2)
+
+$(obj)/uImage.xz: $(obj)/vmlinux.bin.xz FORCE
+       $(call if_changed,uimage,xz)
+
+$(obj)/uImage.lzo: $(obj)/vmlinux.bin.lzo FORCE
+       $(call if_changed,uimage,lzo)
+
+$(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,uimage,none)
+
+$(obj)/uImage: $(obj)/uImage.$(suffix-y)
+       @ln -sf $(notdir $<) $@
+       @echo '  Image $@ is ready'
diff --git a/arch/metag/boot/dts/Makefile b/arch/metag/boot/dts/Makefile
new file mode 100644 (file)
index 0000000..e0b5afd
--- /dev/null
@@ -0,0 +1,16 @@
+dtb-y  += skeleton.dtb
+
+# Built-in dtb
+builtindtb-y                           := skeleton
+
+ifneq ($(CONFIG_METAG_BUILTIN_DTB_NAME),"")
+       builtindtb-y                    := $(CONFIG_METAG_BUILTIN_DTB_NAME)
+endif
+obj-$(CONFIG_METAG_BUILTIN_DTB)        += $(patsubst "%",%,$(builtindtb-y)).dtb.o
+
+targets        += dtbs
+targets        += $(dtb-y)
+
+dtbs: $(addprefix $(obj)/, $(dtb-y))
+
+clean-files += *.dtb
diff --git a/arch/metag/boot/dts/skeleton.dts b/arch/metag/boot/dts/skeleton.dts
new file mode 100644 (file)
index 0000000..7244d1f
--- /dev/null
@@ -0,0 +1,10 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+/dts-v1/;
+
+/include/ "skeleton.dtsi"
diff --git a/arch/metag/boot/dts/skeleton.dtsi b/arch/metag/boot/dts/skeleton.dtsi
new file mode 100644 (file)
index 0000000..78229ea
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * Skeleton device tree; the bare minimum needed to boot; just include and
+ * add a compatible value.  The bootloader will typically populate the memory
+ * node.
+ */
+
+/ {
+       compatible = "img,meta";
+       #address-cells = <1>;
+       #size-cells = <1>;
+       chosen { };
+       aliases { };
+       memory { device_type = "memory"; reg = <0 0>; };
+};
diff --git a/arch/metag/configs/meta1_defconfig b/arch/metag/configs/meta1_defconfig
new file mode 100644 (file)
index 0000000..c35a75e
--- /dev/null
@@ -0,0 +1,40 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_META12_FPGA=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_defconfig b/arch/metag/configs/meta2_defconfig
new file mode 100644 (file)
index 0000000..fb31484
--- /dev/null
@@ -0,0 +1,41 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/configs/meta2_smp_defconfig b/arch/metag/configs/meta2_smp_defconfig
new file mode 100644 (file)
index 0000000..6c7b777
--- /dev/null
@@ -0,0 +1,42 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_ELF_CORE is not set
+CONFIG_SLAB=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_METAG_L2C=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_METAG_HALT_ON_PANIC=y
+CONFIG_SMP=y
+CONFIG_METAG_DA=y
+CONFIG_HZ_100=y
+CONFIG_DEVTMPFS=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=1
+CONFIG_BLK_DEV_RAM_SIZE=16384
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_DA_TTY=y
+CONFIG_DA_CONSOLE=y
+# CONFIG_DEVKMEM is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_INFO=y
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
new file mode 100644 (file)
index 0000000..6ae0ccb
--- /dev/null
@@ -0,0 +1,54 @@
+generic-y += auxvec.h
+generic-y += bitsperlong.h
+generic-y += bugs.h
+generic-y += clkdev.h
+generic-y += cputime.h
+generic-y += current.h
+generic-y += device.h
+generic-y += dma.h
+generic-y += emergency-restart.h
+generic-y += errno.h
+generic-y += exec.h
+generic-y += fb.h
+generic-y += fcntl.h
+generic-y += futex.h
+generic-y += hardirq.h
+generic-y += hw_irq.h
+generic-y += ioctl.h
+generic-y += ioctls.h
+generic-y += ipcbuf.h
+generic-y += irq_regs.h
+generic-y += kdebug.h
+generic-y += kmap_types.h
+generic-y += kvm_para.h
+generic-y += local.h
+generic-y += local64.h
+generic-y += msgbuf.h
+generic-y += mutex.h
+generic-y += param.h
+generic-y += pci.h
+generic-y += percpu.h
+generic-y += poll.h
+generic-y += posix_types.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += sembuf.h
+generic-y += serial.h
+generic-y += shmbuf.h
+generic-y += shmparam.h
+generic-y += signal.h
+generic-y += socket.h
+generic-y += sockios.h
+generic-y += stat.h
+generic-y += statfs.h
+generic-y += switch_to.h
+generic-y += termbits.h
+generic-y += termios.h
+generic-y += timex.h
+generic-y += trace_clock.h
+generic-y += types.h
+generic-y += ucontext.h
+generic-y += unaligned.h
+generic-y += user.h
+generic-y += vga.h
+generic-y += xor.h
diff --git a/arch/metag/include/asm/atomic.h b/arch/metag/include/asm/atomic.h
new file mode 100644 (file)
index 0000000..307ecd2
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef __ASM_METAG_ATOMIC_H
+#define __ASM_METAG_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/cmpxchg.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+/* The simple UP case. */
+#include <asm-generic/atomic.h>
+#else
+
+#if defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/atomic_lock1.h>
+#else
+#include <asm/atomic_lnkget.h>
+#endif
+
+#define atomic_add_negative(a, v)       (atomic_add_return((a), (v)) < 0)
+
+#define atomic_dec_return(v) atomic_sub_return(1, (v))
+#define atomic_inc_return(v) atomic_add_return(1, (v))
+
+/*
+ * atomic_inc_and_test - increment and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1
+ * and returns true if the result is zero, or false for all
+ * other cases.
+ */
+#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+
+#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0)
+#define atomic_dec_and_test(v) (atomic_sub_return(1, (v)) == 0)
+
+#define atomic_inc(v) atomic_add(1, (v))
+#define atomic_dec(v) atomic_sub(1, (v))
+
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
+#define smp_mb__before_atomic_dec()    barrier()
+#define smp_mb__after_atomic_dec()     barrier()
+#define smp_mb__before_atomic_inc()    barrier()
+#define smp_mb__after_atomic_inc()     barrier()
+
+#endif
+
+#define atomic_dec_if_positive(v)       atomic_sub_if_positive(1, v)
+
+#include <asm-generic/atomic64.h>
+
+#endif /* __ASM_METAG_ATOMIC_H */
diff --git a/arch/metag/include/asm/atomic_lnkget.h b/arch/metag/include/asm/atomic_lnkget.h
new file mode 100644 (file)
index 0000000..d2e60a1
--- /dev/null
@@ -0,0 +1,234 @@
+#ifndef __ASM_METAG_ATOMIC_LNKGET_H
+#define __ASM_METAG_ATOMIC_LNKGET_H
+
+#define ATOMIC_INIT(i) { (i) }
+
+#define atomic_set(v, i)               ((v)->counter = (i))
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int atomic_read(const atomic_t *v)
+{
+       int temp;
+
+       asm volatile (
+               "LNKGETD %0, [%1]\n"
+               : "=da" (temp)
+               : "da" (&v->counter));
+
+       return temp;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+       int temp;
+
+       asm volatile (
+               "1:     LNKGETD %0, [%1]\n"
+               "       ADD     %0, %0, %2\n"
+               "       LNKSETD [%1], %0\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp)
+               : "da" (&v->counter), "bd" (i)
+               : "cc");
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+       int temp;
+
+       asm volatile (
+               "1:     LNKGETD %0, [%1]\n"
+               "       SUB     %0, %0, %2\n"
+               "       LNKSETD [%1], %0\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ 1b\n"
+               : "=&d" (temp)
+               : "da" (&v->counter), "bd" (i)
+               : "cc");
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+       int result, temp;
+
+       smp_mb();
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       ADD     %1, %1, %3\n"
+               "       LNKSETD [%2], %1\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ 1b\n"
+               : "=&d" (temp), "=&da" (result)
+               : "da" (&v->counter), "bd" (i)
+               : "cc");
+
+       smp_mb();
+
+       return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+       int result, temp;
+
+       smp_mb();
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       SUB     %1, %1, %3\n"
+               "       LNKSETD [%2], %1\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp), "=&da" (result)
+               : "da" (&v->counter), "bd" (i)
+               : "cc");
+
+       smp_mb();
+
+       return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+       int temp;
+
+       asm volatile (
+               "1:     LNKGETD %0, [%1]\n"
+               "       AND     %0, %0, %2\n"
+               "       LNKSETD [%1] %0\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp)
+               : "da" (&v->counter), "bd" (~mask)
+               : "cc");
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+       int temp;
+
+       asm volatile (
+               "1:     LNKGETD %0, [%1]\n"
+               "       OR      %0, %0, %2\n"
+               "       LNKSETD [%1], %0\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp)
+               : "da" (&v->counter), "bd" (mask)
+               : "cc");
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+       int result, temp;
+
+       smp_mb();
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       CMP     %1, %3\n"
+               "       LNKSETDEQ [%2], %4\n"
+               "       BNE     2f\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               "2:\n"
+               : "=&d" (temp), "=&d" (result)
+               : "da" (&v->counter), "bd" (old), "da" (new)
+               : "cc");
+
+       smp_mb();
+
+       return result;
+}
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+       int temp, old;
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       LNKSETD [%2], %3\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp), "=&d" (old)
+               : "da" (&v->counter), "da" (new)
+               : "cc");
+
+       return old;
+}
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+       int result, temp;
+
+       smp_mb();
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       CMP     %1, %3\n"
+               "       ADD     %0, %1, %4\n"
+               "       LNKSETDNE [%2], %0\n"
+               "       BEQ     2f\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               "2:\n"
+               : "=&d" (temp), "=&d" (result)
+               : "da" (&v->counter), "bd" (u), "bd" (a)
+               : "cc");
+
+       smp_mb();
+
+       return result;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+       int result, temp;
+
+       asm volatile (
+               "1:     LNKGETD %1, [%2]\n"
+               "       SUBS    %1, %1, %3\n"
+               "       LNKSETDGE [%2], %1\n"
+               "       BLT     2f\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               "2:\n"
+               : "=&d" (temp), "=&da" (result)
+               : "da" (&v->counter), "bd" (i)
+               : "cc");
+
+       return result;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LNKGET_H */
diff --git a/arch/metag/include/asm/atomic_lock1.h b/arch/metag/include/asm/atomic_lock1.h
new file mode 100644 (file)
index 0000000..e578955
--- /dev/null
@@ -0,0 +1,160 @@
+#ifndef __ASM_METAG_ATOMIC_LOCK1_H
+#define __ASM_METAG_ATOMIC_LOCK1_H
+
+#define ATOMIC_INIT(i) { (i) }
+
+#include <linux/compiler.h>
+
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+static inline int atomic_read(const atomic_t *v)
+{
+       return (v)->counter;
+}
+
+/*
+ * atomic_set needs to be take the lock to protect atomic_add_unless from a
+ * possible race, as it reads the counter twice:
+ *
+ *  CPU0                               CPU1
+ *  atomic_add_unless(1, 0)
+ *    ret = v->counter (non-zero)
+ *    if (ret != u)                    v->counter = 0
+ *      v->counter += 1 (counter set to 1)
+ *
+ * Making atomic_set take the lock ensures that ordering and logical
+ * consistency is preserved.
+ */
+static inline int atomic_set(atomic_t *v, int i)
+{
+       unsigned long flags;
+
+       __global_lock1(flags);
+       fence();
+       v->counter = i;
+       __global_unlock1(flags);
+       return i;
+}
+
+static inline void atomic_add(int i, atomic_t *v)
+{
+       unsigned long flags;
+
+       __global_lock1(flags);
+       fence();
+       v->counter += i;
+       __global_unlock1(flags);
+}
+
+static inline void atomic_sub(int i, atomic_t *v)
+{
+       unsigned long flags;
+
+       __global_lock1(flags);
+       fence();
+       v->counter -= i;
+       __global_unlock1(flags);
+}
+
+static inline int atomic_add_return(int i, atomic_t *v)
+{
+       unsigned long result;
+       unsigned long flags;
+
+       __global_lock1(flags);
+       result = v->counter;
+       result += i;
+       fence();
+       v->counter = result;
+       __global_unlock1(flags);
+
+       return result;
+}
+
+static inline int atomic_sub_return(int i, atomic_t *v)
+{
+       unsigned long result;
+       unsigned long flags;
+
+       __global_lock1(flags);
+       result = v->counter;
+       result -= i;
+       fence();
+       v->counter = result;
+       __global_unlock1(flags);
+
+       return result;
+}
+
+static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
+{
+       unsigned long flags;
+
+       __global_lock1(flags);
+       fence();
+       v->counter &= ~mask;
+       __global_unlock1(flags);
+}
+
+static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
+{
+       unsigned long flags;
+
+       __global_lock1(flags);
+       fence();
+       v->counter |= mask;
+       __global_unlock1(flags);
+}
+
+static inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+       int ret;
+       unsigned long flags;
+
+       __global_lock1(flags);
+       ret = v->counter;
+       if (ret == old) {
+               fence();
+               v->counter = new;
+       }
+       __global_unlock1(flags);
+
+       return ret;
+}
+
+#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int __atomic_add_unless(atomic_t *v, int a, int u)
+{
+       int ret;
+       unsigned long flags;
+
+       __global_lock1(flags);
+       ret = v->counter;
+       if (ret != u) {
+               fence();
+               v->counter += a;
+       }
+       __global_unlock1(flags);
+
+       return ret;
+}
+
+static inline int atomic_sub_if_positive(int i, atomic_t *v)
+{
+       int ret;
+       unsigned long flags;
+
+       __global_lock1(flags);
+       ret = v->counter - 1;
+       if (ret >= 0) {
+               fence();
+               v->counter = ret;
+       }
+       __global_unlock1(flags);
+
+       return ret;
+}
+
+#endif /* __ASM_METAG_ATOMIC_LOCK1_H */
diff --git a/arch/metag/include/asm/barrier.h b/arch/metag/include/asm/barrier.h
new file mode 100644 (file)
index 0000000..c90bfc6
--- /dev/null
@@ -0,0 +1,85 @@
+#ifndef _ASM_METAG_BARRIER_H
+#define _ASM_METAG_BARRIER_H
+
+#include <asm/metag_mem.h>
+
+#define nop()          asm volatile ("NOP")
+#define mb()           wmb()
+#define rmb()          barrier()
+
+#ifdef CONFIG_METAG_META21
+
+/* HTP and above have a system event to fence writes */
+static inline void wr_fence(void)
+{
+       volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_FENCE;
+       barrier();
+       *flushptr = 0;
+}
+
+#else /* CONFIG_METAG_META21 */
+
+/*
+ * ATP doesn't have system event to fence writes, so it is necessary to flush
+ * the processor write queues as well as possibly the write combiner (depending
+ * on the page being written).
+ * To ensure the write queues are flushed we do 4 writes to a system event
+ * register (in this case write combiner flush) which will also flush the write
+ * combiner.
+ */
+static inline void wr_fence(void)
+{
+       volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;
+       barrier();
+       *flushptr = 0;
+       *flushptr = 0;
+       *flushptr = 0;
+       *flushptr = 0;
+}
+
+#endif /* !CONFIG_METAG_META21 */
+
+static inline void wmb(void)
+{
+       /* flush writes through the write combiner */
+       wr_fence();
+}
+
+#define read_barrier_depends()  do { } while (0)
+
+#ifndef CONFIG_SMP
+#define fence()                do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#else
+
+#ifdef CONFIG_METAG_SMP_WRITE_REORDERING
+/*
+ * Write to the atomic memory unlock system event register (command 0). This is
+ * needed before a write to shared memory in a critical section, to prevent
+ * external reordering of writes before the fence on other threads with writes
+ * after the fence on this thread (and to prevent the ensuing cache-memory
+ * incoherence). It is therefore ineffective if used after and on the same
+ * thread as a write.
+ */
+static inline void fence(void)
+{
+       volatile int *flushptr = (volatile int *) LINSYSEVENT_WR_ATOMIC_UNLOCK;
+       barrier();
+       *flushptr = 0;
+}
+#define smp_mb()        fence()
+#define smp_rmb()       fence()
+#define smp_wmb()       barrier()
+#else
+#define fence()                do { } while (0)
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#endif
+#endif
+#define smp_read_barrier_depends()     do { } while (0)
+#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
+
+#endif /* _ASM_METAG_BARRIER_H */
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
new file mode 100644 (file)
index 0000000..c0d0df0
--- /dev/null
@@ -0,0 +1,132 @@
+#ifndef __ASM_METAG_BITOPS_H
+#define __ASM_METAG_BITOPS_H
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/global_lock.h>
+
+/*
+ * clear_bit() doesn't provide any barrier for the compiler.
+ */
+#define smp_mb__before_clear_bit()     barrier()
+#define smp_mb__after_clear_bit()      barrier()
+
+#ifdef CONFIG_SMP
+/*
+ * These functions are the basis of our bit ops.
+ */
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       fence();
+       *p |= mask;
+       __global_unlock1(flags);
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       fence();
+       *p &= ~mask;
+       __global_unlock1(flags);
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       fence();
+       *p ^= mask;
+       __global_unlock1(flags);
+}
+
+static inline int test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long old;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       old = *p;
+       if (!(old & mask)) {
+               fence();
+               *p = old | mask;
+       }
+       __global_unlock1(flags);
+
+       return (old & mask) != 0;
+}
+
+static inline int test_and_clear_bit(unsigned int bit,
+                                    volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long old;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       old = *p;
+       if (old & mask) {
+               fence();
+               *p = old & ~mask;
+       }
+       __global_unlock1(flags);
+
+       return (old & mask) != 0;
+}
+
+static inline int test_and_change_bit(unsigned int bit,
+                                     volatile unsigned long *p)
+{
+       unsigned long flags;
+       unsigned long old;
+       unsigned long mask = 1UL << (bit & 31);
+
+       p += bit >> 5;
+
+       __global_lock1(flags);
+       fence();
+       old = *p;
+       *p = old ^ mask;
+       __global_unlock1(flags);
+
+       return (old & mask) != 0;
+}
+
+#else
+#include <asm-generic/bitops/atomic.h>
+#endif /* CONFIG_SMP */
+
+#include <asm-generic/bitops/non-atomic.h>
+#include <asm-generic/bitops/find.h>
+#include <asm-generic/bitops/ffs.h>
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/ffz.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/lock.h>
+#include <asm-generic/bitops/sched.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/ext2-atomic.h>
+
+#endif /* __ASM_METAG_BITOPS_H */
diff --git a/arch/metag/include/asm/bug.h b/arch/metag/include/asm/bug.h
new file mode 100644 (file)
index 0000000..d04b48c
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ASM_METAG_BUG_H
+#define _ASM_METAG_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+extern const char *trap_name(int trapno);
+extern void die(const char *str, struct pt_regs *regs, long err,
+               unsigned long addr) __attribute__ ((noreturn));
+
+#endif
diff --git a/arch/metag/include/asm/cache.h b/arch/metag/include/asm/cache.h
new file mode 100644 (file)
index 0000000..a43b650
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef __ASM_METAG_CACHE_H
+#define __ASM_METAG_CACHE_H
+
+/* L1 cache line size (64 bytes) */
+#define L1_CACHE_SHIFT         6
+#define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
+
+/* Meta requires large data items to be 8 byte aligned. */
+#define ARCH_SLAB_MINALIGN     8
+
+/*
+ * With an L2 cache, we may invalidate dirty lines, so we need to ensure DMA
+ * buffers have cache line alignment.
+ */
+#ifdef CONFIG_METAG_L2C
+#define ARCH_DMA_MINALIGN      L1_CACHE_BYTES
+#else
+#define ARCH_DMA_MINALIGN      8
+#endif
+
+#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+
+#endif
diff --git a/arch/metag/include/asm/cacheflush.h b/arch/metag/include/asm/cacheflush.h
new file mode 100644 (file)
index 0000000..7787ec5
--- /dev/null
@@ -0,0 +1,250 @@
+#ifndef _METAG_CACHEFLUSH_H
+#define _METAG_CACHEFLUSH_H
+
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+void metag_cache_probe(void);
+
+void metag_data_cache_flush_all(const void *start);
+void metag_code_cache_flush_all(const void *start);
+
+/*
+ * Routines to flush physical cache lines that may be used to cache data or code
+ * normally accessed via the linear address range supplied. The region flushed
+ * must either lie in local or global address space determined by the top bit of
+ * the pStart address. If Bytes is >= 4K then the whole of the related cache
+ * state will be flushed rather than a limited range.
+ */
+void metag_data_cache_flush(const void *start, int bytes);
+void metag_code_cache_flush(const void *start, int bytes);
+
+#ifdef CONFIG_METAG_META12
+
+/* Write through, virtually tagged, split I/D cache. */
+
+static inline void __flush_cache_all(void)
+{
+       metag_code_cache_flush_all((void *) PAGE_OFFSET);
+       metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_cache_all() __flush_cache_all()
+
+/* flush the entire user address space referenced in this mm structure */
+static inline void flush_cache_mm(struct mm_struct *mm)
+{
+       if (mm == current->mm)
+               __flush_cache_all();
+}
+
+#define flush_cache_dup_mm(mm) flush_cache_mm(mm)
+
+/* flush a range of addresses from this mm */
+static inline void flush_cache_range(struct vm_area_struct *vma,
+                                    unsigned long start, unsigned long end)
+{
+       flush_cache_mm(vma->vm_mm);
+}
+
+static inline void flush_cache_page(struct vm_area_struct *vma,
+                                   unsigned long vmaddr, unsigned long pfn)
+{
+       flush_cache_mm(vma->vm_mm);
+}
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE      1
+static inline void flush_dcache_page(struct page *page)
+{
+       metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#define flush_dcache_mmap_lock(mapping)                do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
+
+static inline void flush_icache_page(struct vm_area_struct *vma,
+                                    struct page *page)
+{
+       metag_code_cache_flush(page_to_virt(page), PAGE_SIZE);
+}
+
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+       metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+static inline void flush_cache_vunmap(unsigned long start, unsigned long end)
+{
+       metag_data_cache_flush_all((void *) PAGE_OFFSET);
+}
+
+#else
+
+/* Write through, physically tagged, split I/D cache. */
+
+#define flush_cache_all()                      do { } while (0)
+#define flush_cache_mm(mm)                     do { } while (0)
+#define flush_cache_dup_mm(mm)                 do { } while (0)
+#define flush_cache_range(vma, start, end)     do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)     do { } while (0)
+#define flush_dcache_mmap_lock(mapping)                do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)      do { } while (0)
+#define flush_icache_page(vma, pg)             do { } while (0)
+#define flush_cache_vmap(start, end)           do { } while (0)
+#define flush_cache_vunmap(start, end)         do { } while (0)
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE      1
+static inline void flush_dcache_page(struct page *page)
+{
+       /* FIXME: We can do better than this. All we are trying to do is
+        * make the i-cache coherent, we should use the PG_arch_1 bit like
+        * e.g. powerpc.
+        */
+#ifdef CONFIG_SMP
+       metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+       metag_code_cache_flush_all((void *) PAGE_OFFSET);
+#endif
+}
+
+#endif
+
+/* Push n pages at kernel virtual address and clear the icache */
+static inline void flush_icache_range(unsigned long address,
+                                     unsigned long endaddr)
+{
+#ifdef CONFIG_SMP
+       metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+       metag_code_cache_flush((void *) address, endaddr - address);
+#endif
+}
+
+static inline void flush_cache_sigtramp(unsigned long addr, int size)
+{
+       /*
+        * Flush the icache in case there was previously some code
+        * fetched from this address, perhaps a previous sigtramp.
+        *
+        * We don't need to flush the dcache, it's write through and
+        * we just wrote the sigtramp code through it.
+        */
+#ifdef CONFIG_SMP
+       metag_out32(1, SYSC_ICACHE_FLUSH);
+#else
+       metag_code_cache_flush((void *) addr, size);
+#endif
+}
+
+#ifdef CONFIG_METAG_L2C
+
+/*
+ * Perform a single specific CACHEWD operation on an address, masking lower bits
+ * of address first.
+ */
+static inline void cachewd_line(void *addr, unsigned int data)
+{
+       unsigned long masked = (unsigned long)addr & -0x40;
+       __builtin_meta2_cachewd((void *)masked, data);
+}
+
+/* Perform a certain CACHEW op on each cache line in a range */
+static inline void cachew_region_op(void *start, unsigned long size,
+                                   unsigned int op)
+{
+       unsigned long offset = (unsigned long)start & 0x3f;
+       int i;
+       if (offset) {
+               size += offset;
+               start -= offset;
+       }
+       i = (size - 1) >> 6;
+       do {
+               __builtin_meta2_cachewd(start, op);
+               start += 0x40;
+       } while (i--);
+}
+
+/* prevent write fence and flushbacks being reordered in L2 */
+static inline void l2c_fence_flush(void *addr)
+{
+       /*
+        * Synchronise by reading back and re-flushing.
+        * It is assumed this access will miss, as the caller should have just
+        * flushed the cache line.
+        */
+       (void)(volatile u8 *)addr;
+       cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+}
+
+/* prevent write fence and writebacks being reordered in L2 */
+static inline void l2c_fence(void *addr)
+{
+       /*
+        * A write back has occurred, but not necessarily an invalidate, so the
+        * readback in l2c_fence_flush() would hit in the cache and have no
+        * effect. Therefore fully flush the line first.
+        */
+       cachewd_line(addr, CACHEW_FLUSH_L1D_L2);
+       l2c_fence_flush(addr);
+}
+
+/* Used to keep memory consistent when doing DMA. */
+static inline void flush_dcache_region(void *start, unsigned long size)
+{
+       /* metag_data_cache_flush won't flush L2 cache lines if size >= 4096 */
+       if (meta_l2c_is_enabled()) {
+               cachew_region_op(start, size, CACHEW_FLUSH_L1D_L2);
+               if (meta_l2c_is_writeback())
+                       l2c_fence_flush(start + size - 1);
+       } else {
+               metag_data_cache_flush(start, size);
+       }
+}
+
+/* Write back dirty lines to memory (or do nothing if no writeback caches) */
+static inline void writeback_dcache_region(void *start, unsigned long size)
+{
+       if (meta_l2c_is_enabled() && meta_l2c_is_writeback()) {
+               cachew_region_op(start, size, CACHEW_WRITEBACK_L1D_L2);
+               l2c_fence(start + size - 1);
+       }
+}
+
+/* Invalidate (may also write back if necessary) */
+static inline void invalidate_dcache_region(void *start, unsigned long size)
+{
+       if (meta_l2c_is_enabled())
+               cachew_region_op(start, size, CACHEW_INVALIDATE_L1D_L2);
+       else
+               metag_data_cache_flush(start, size);
+}
+#else
+#define flush_dcache_region(s, l)      metag_data_cache_flush((s), (l))
+#define writeback_dcache_region(s, l)  do {} while (0)
+#define invalidate_dcache_region(s, l) flush_dcache_region((s), (l))
+#endif
+
+static inline void copy_to_user_page(struct vm_area_struct *vma,
+                                    struct page *page, unsigned long vaddr,
+                                    void *dst, const void *src,
+                                    unsigned long len)
+{
+       memcpy(dst, src, len);
+       flush_icache_range((unsigned long)dst, (unsigned long)dst + len);
+}
+
+static inline void copy_from_user_page(struct vm_area_struct *vma,
+                                      struct page *page, unsigned long vaddr,
+                                      void *dst, const void *src,
+                                      unsigned long len)
+{
+       memcpy(dst, src, len);
+}
+
+#endif /* _METAG_CACHEFLUSH_H */
diff --git a/arch/metag/include/asm/cachepart.h b/arch/metag/include/asm/cachepart.h
new file mode 100644 (file)
index 0000000..cf6b44e
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_CACHEPART_H_
+#define _METAG_CACHEPART_H_
+
+/**
+ * get_dcache_size() - Get size of data cache.
+ */
+unsigned int get_dcache_size(void);
+
+/**
+ * get_icache_size() - Get size of code cache.
+ */
+unsigned int get_icache_size(void);
+
+/**
+ * get_global_dcache_size() - Get the thread's global dcache.
+ *
+ * Returns the size of the current thread's global dcache partition.
+ */
+unsigned int get_global_dcache_size(void);
+
+/**
+ * get_global_icache_size() - Get the thread's global icache.
+ *
+ * Returns the size of the current thread's global icache partition.
+ */
+unsigned int get_global_icache_size(void);
+
+/**
+ * check_for_dache_aliasing() - Ensure that the bootloader has configured the
+ * dache and icache properly to avoid aliasing
+ * @thread_id: Hardware thread ID
+ *
+ */
+void check_for_cache_aliasing(int thread_id);
+
+#endif
diff --git a/arch/metag/include/asm/checksum.h b/arch/metag/include/asm/checksum.h
new file mode 100644 (file)
index 0000000..999bf76
--- /dev/null
@@ -0,0 +1,92 @@
+#ifndef _METAG_CHECKSUM_H
+#define _METAG_CHECKSUM_H
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy(const void *src, void *dst, int len,
+                               __wsum sum);
+
+/*
+ * the same as csum_partial_copy, but copies from user space.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+                                       int len, __wsum sum, int *csum_err);
+
+#define csum_partial_copy_nocheck(src, dst, len, sum)  \
+       csum_partial_copy((src), (dst), (len), (sum))
+
+/*
+ * Fold a partial checksum
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+       u32 sum = (__force u32)csum;
+       sum = (sum & 0xffff) + (sum >> 16);
+       sum = (sum & 0xffff) + (sum >> 16);
+       return (__force __sum16)~sum;
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ */
+extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+                                       unsigned short len,
+                                       unsigned short proto,
+                                       __wsum sum)
+{
+       unsigned long len_proto = (proto + len) << 8;
+       asm ("ADD    %0, %0, %1\n"
+            "ADDS   %0, %0, %2\n"
+            "ADDCS  %0, %0, #1\n"
+            "ADDS   %0, %0, %3\n"
+            "ADDCS  %0, %0, #1\n"
+            : "=d" (sum)
+            : "d" (daddr), "d" (saddr), "d" (len_proto),
+              "0" (sum)
+            : "cc");
+       return sum;
+}
+
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len,
+                 unsigned short proto, __wsum sum)
+{
+       return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+extern __sum16 ip_compute_csum(const void *buff, int len);
+
+#endif /* _METAG_CHECKSUM_H */
diff --git a/arch/metag/include/asm/clock.h b/arch/metag/include/asm/clock.h
new file mode 100644 (file)
index 0000000..3e2915a
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * arch/metag/include/asm/clock.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_CLOCK_H_
+#define _METAG_CLOCK_H_
+
+#include <asm/mach/arch.h>
+
+/**
+ * struct meta_clock_desc - Meta Core clock callbacks.
+ * @get_core_freq:     Get the frequency of the Meta core. If this is NULL, the
+ *                     core frequency will be determined like this:
+ *                     Meta 1: based on loops_per_jiffy.
+ *                     Meta 2: (EXPAND_TIMER_DIV + 1) MHz.
+ */
+struct meta_clock_desc {
+       unsigned long           (*get_core_freq)(void);
+};
+
+extern struct meta_clock_desc _meta_clock;
+
+/*
+ * Set up the default clock, ensuring all callbacks are valid - only accessible
+ * during boot.
+ */
+void setup_meta_clocks(struct meta_clock_desc *desc);
+
+/**
+ * get_coreclock() - Get the frequency of the Meta core clock.
+ *
+ * Returns:    The Meta core clock frequency in Hz.
+ */
+static inline unsigned long get_coreclock(void)
+{
+       /*
+        * Use the current clock callback. If set correctly this will provide
+        * the most accurate frequency as it can be calculated directly from the
+        * PLL configuration. otherwise a default callback will have been set
+        * instead.
+        */
+       return _meta_clock.get_core_freq();
+}
+
+#endif /* _METAG_CLOCK_H_ */
diff --git a/arch/metag/include/asm/cmpxchg.h b/arch/metag/include/asm/cmpxchg.h
new file mode 100644 (file)
index 0000000..b1bc1be
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef __ASM_METAG_CMPXCHG_H
+#define __ASM_METAG_CMPXCHG_H
+
+#include <asm/barrier.h>
+
+#if defined(CONFIG_METAG_ATOMICITY_IRQSOFF)
+#include <asm/cmpxchg_irq.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LOCK1)
+#include <asm/cmpxchg_lock1.h>
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+#include <asm/cmpxchg_lnkget.h>
+#endif
+
+extern void __xchg_called_with_bad_pointer(void);
+
+#define __xchg(ptr, x, size)                           \
+({                                                     \
+       unsigned long __xchg__res;                      \
+       volatile void *__xchg_ptr = (ptr);              \
+       switch (size) {                                 \
+       case 4:                                         \
+               __xchg__res = xchg_u32(__xchg_ptr, x);  \
+               break;                                  \
+       case 1:                                         \
+               __xchg__res = xchg_u8(__xchg_ptr, x);   \
+               break;                                  \
+       default:                                        \
+               __xchg_called_with_bad_pointer();       \
+               __xchg__res = x;                        \
+               break;                                  \
+       }                                               \
+                                                       \
+       __xchg__res;                                    \
+})
+
+#define xchg(ptr, x)   \
+       ((__typeof__(*(ptr)))__xchg((ptr), (unsigned long)(x), sizeof(*(ptr))))
+
+/* This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg(). */
+extern void __cmpxchg_called_with_bad_pointer(void);
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+                                     unsigned long new, int size)
+{
+       switch (size) {
+       case 4:
+               return __cmpxchg_u32(ptr, old, new);
+       }
+       __cmpxchg_called_with_bad_pointer();
+       return old;
+}
+
+#define __HAVE_ARCH_CMPXCHG 1
+
+#define cmpxchg(ptr, o, n)                                             \
+       ({                                                              \
+               __typeof__(*(ptr)) _o_ = (o);                           \
+               __typeof__(*(ptr)) _n_ = (n);                           \
+               (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \
+                                              (unsigned long)_n_,      \
+                                              sizeof(*(ptr)));         \
+       })
+
+#endif /* __ASM_METAG_CMPXCHG_H */
diff --git a/arch/metag/include/asm/cmpxchg_irq.h b/arch/metag/include/asm/cmpxchg_irq.h
new file mode 100644 (file)
index 0000000..6495731
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_CMPXCHG_IRQ_H
+#define __ASM_METAG_CMPXCHG_IRQ_H
+
+#include <linux/irqflags.h>
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+       unsigned long flags, retval;
+
+       local_irq_save(flags);
+       retval = *m;
+       *m = val;
+       local_irq_restore(flags);
+       return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+       unsigned long flags, retval;
+
+       local_irq_save(flags);
+       retval = *m;
+       *m = val & 0xff;
+       local_irq_restore(flags);
+       return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+                                         unsigned long new)
+{
+       __u32 retval;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       retval = *m;
+       if (retval == old)
+               *m = new;
+       local_irq_restore(flags);       /* implies memory barrier  */
+       return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_IRQ_H */
diff --git a/arch/metag/include/asm/cmpxchg_lnkget.h b/arch/metag/include/asm/cmpxchg_lnkget.h
new file mode 100644 (file)
index 0000000..0154e28
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef __ASM_METAG_CMPXCHG_LNKGET_H
+#define __ASM_METAG_CMPXCHG_LNKGET_H
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+       int temp, old;
+
+       smp_mb();
+
+       asm volatile (
+                     "1:       LNKGETD %1, [%2]\n"
+                     " LNKSETD [%2], %3\n"
+                     " DEFR    %0, TXSTAT\n"
+                     " ANDT    %0, %0, #HI(0x3f000000)\n"
+                     " CMPT    %0, #HI(0x02000000)\n"
+                     " BNZ     1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+                     " DCACHE  [%2], %0\n"
+#endif
+                     : "=&d" (temp), "=&d" (old)
+                     : "da" (m), "da" (val)
+                     : "cc"
+                     );
+
+       smp_mb();
+
+       return old;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+       int temp, old;
+
+       smp_mb();
+
+       asm volatile (
+                     "1:       LNKGETD %1, [%2]\n"
+                     " LNKSETD [%2], %3\n"
+                     " DEFR    %0, TXSTAT\n"
+                     " ANDT    %0, %0, #HI(0x3f000000)\n"
+                     " CMPT    %0, #HI(0x02000000)\n"
+                     " BNZ     1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+                     " DCACHE  [%2], %0\n"
+#endif
+                     : "=&d" (temp), "=&d" (old)
+                     : "da" (m), "da" (val & 0xff)
+                     : "cc"
+                     );
+
+       smp_mb();
+
+       return old;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+                                         unsigned long new)
+{
+       __u32 retval, temp;
+
+       smp_mb();
+
+       asm volatile (
+                     "1:       LNKGETD %1, [%2]\n"
+                     " CMP     %1, %3\n"
+                     " LNKSETDEQ [%2], %4\n"
+                     " BNE     2f\n"
+                     " DEFR    %0, TXSTAT\n"
+                     " ANDT    %0, %0, #HI(0x3f000000)\n"
+                     " CMPT    %0, #HI(0x02000000)\n"
+                     " BNZ     1b\n"
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+                     " DCACHE  [%2], %0\n"
+#endif
+                     "2:\n"
+                     : "=&d" (temp), "=&da" (retval)
+                     : "da" (m), "bd" (old), "da" (new)
+                     : "cc"
+                     );
+
+       smp_mb();
+
+       return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LNKGET_H */
diff --git a/arch/metag/include/asm/cmpxchg_lock1.h b/arch/metag/include/asm/cmpxchg_lock1.h
new file mode 100644 (file)
index 0000000..fd68504
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef __ASM_METAG_CMPXCHG_LOCK1_H
+#define __ASM_METAG_CMPXCHG_LOCK1_H
+
+#include <asm/global_lock.h>
+
+/* Use LOCK2 as these have to be atomic w.r.t. ordinary accesses. */
+
+static inline unsigned long xchg_u32(volatile u32 *m, unsigned long val)
+{
+       unsigned long flags, retval;
+
+       __global_lock2(flags);
+       fence();
+       retval = *m;
+       *m = val;
+       __global_unlock2(flags);
+       return retval;
+}
+
+static inline unsigned long xchg_u8(volatile u8 *m, unsigned long val)
+{
+       unsigned long flags, retval;
+
+       __global_lock2(flags);
+       fence();
+       retval = *m;
+       *m = val & 0xff;
+       __global_unlock2(flags);
+       return retval;
+}
+
+static inline unsigned long __cmpxchg_u32(volatile int *m, unsigned long old,
+                                         unsigned long new)
+{
+       __u32 retval;
+       unsigned long flags;
+
+       __global_lock2(flags);
+       retval = *m;
+       if (retval == old) {
+               fence();
+               *m = new;
+       }
+       __global_unlock2(flags);
+       return retval;
+}
+
+#endif /* __ASM_METAG_CMPXCHG_LOCK1_H */
diff --git a/arch/metag/include/asm/core_reg.h b/arch/metag/include/asm/core_reg.h
new file mode 100644 (file)
index 0000000..bdbc3a5
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef __ASM_METAG_CORE_REG_H_
+#define __ASM_METAG_CORE_REG_H_
+
+#include <asm/metag_regs.h>
+
+extern void core_reg_write(int unit, int reg, int thread, unsigned int val);
+extern unsigned int core_reg_read(int unit, int reg, int thread);
+
+/*
+ * These macros allow direct access from C to any register known to the
+ * assembler. Example candidates are TXTACTCYC, TXIDLECYC, and TXPRIVEXT.
+ */
+
+#define __core_reg_get(reg) ({                                         \
+       unsigned int __grvalue;                                         \
+       asm volatile("MOV       %0," #reg                               \
+                    : "=r" (__grvalue));                               \
+       __grvalue;                                                      \
+})
+
+#define __core_reg_set(reg, value) do {                                        \
+       unsigned int __srvalue = (value);                               \
+       asm volatile("MOV       " #reg ",%0"                            \
+                    :                                                  \
+                    : "r" (__srvalue));                                \
+} while (0)
+
+#define __core_reg_swap(reg, value) do {                               \
+       unsigned int __srvalue = (value);                               \
+       asm volatile("SWAP      " #reg ",%0"                            \
+                    : "+r" (__srvalue));                               \
+       (value) = __srvalue;                                            \
+} while (0)
+
+#endif
diff --git a/arch/metag/include/asm/cpu.h b/arch/metag/include/asm/cpu.h
new file mode 100644 (file)
index 0000000..decf129
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef _ASM_METAG_CPU_H
+#define _ASM_METAG_CPU_H
+
+#include <linux/percpu.h>
+
+struct cpuinfo_metag {
+       struct cpu cpu;
+#ifdef CONFIG_SMP
+       unsigned long loops_per_jiffy;
+#endif
+};
+
+DECLARE_PER_CPU(struct cpuinfo_metag, cpu_data);
+#endif /* _ASM_METAG_CPU_H */
diff --git a/arch/metag/include/asm/da.h b/arch/metag/include/asm/da.h
new file mode 100644 (file)
index 0000000..81bd521
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+#ifndef _METAG_DA_H_
+#define _METAG_DA_H_
+
+#ifdef CONFIG_METAG_DA
+
+#include <linux/init.h>
+#include <linux/types.h>
+
+extern bool _metag_da_present;
+
+/**
+ * metag_da_enabled() - Find whether a DA is currently enabled.
+ *
+ * Returns:    true if a DA was detected, false if not.
+ */
+static inline bool metag_da_enabled(void)
+{
+       return _metag_da_present;
+}
+
+/**
+ * metag_da_probe() - Try and detect a connected DA.
+ *
+ * This is used at start up to detect whether a DA is active.
+ *
+ * Returns:    0 on detection, -err otherwise.
+ */
+int __init metag_da_probe(void);
+
+#else /* !CONFIG_METAG_DA */
+
+#define metag_da_enabled() false
+#define metag_da_probe() do {} while (0)
+
+#endif
+
+#endif /* _METAG_DA_H_ */
diff --git a/arch/metag/include/asm/delay.h b/arch/metag/include/asm/delay.h
new file mode 100644 (file)
index 0000000..9c92f99
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef _METAG_DELAY_H
+#define _METAG_DELAY_H
+
+/*
+ * Copyright (C) 1993 Linus Torvalds
+ *
+ * Delay routines calling functions in arch/metag/lib/delay.c
+ */
+
+/* Undefined functions to get compile-time errors */
+extern void __bad_udelay(void);
+extern void __bad_ndelay(void);
+
+extern void __udelay(unsigned long usecs);
+extern void __ndelay(unsigned long nsecs);
+extern void __const_udelay(unsigned long xloops);
+extern void __delay(unsigned long loops);
+
+/* 0x10c7 is 2**32 / 1000000 (rounded up) */
+#define udelay(n) (__builtin_constant_p(n) ? \
+       ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
+       __udelay(n))
+
+/* 0x5 is 2**32 / 1000000000 (rounded up) */
+#define ndelay(n) (__builtin_constant_p(n) ? \
+       ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+       __ndelay(n))
+
+#endif /* _METAG_DELAY_H */
diff --git a/arch/metag/include/asm/div64.h b/arch/metag/include/asm/div64.h
new file mode 100644 (file)
index 0000000..0fdd116
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef __ASM_DIV64_H__
+#define __ASM_DIV64_H__
+
+#include <asm-generic/div64.h>
+
+extern u64 div_u64(u64 dividend, u64 divisor);
+extern s64 div_s64(s64 dividend, s64 divisor);
+
+#define div_u64 div_u64
+#define div_s64 div_s64
+
+#endif
diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h
new file mode 100644 (file)
index 0000000..14b23ef
--- /dev/null
@@ -0,0 +1,190 @@
+#ifndef _ASM_METAG_DMA_MAPPING_H
+#define _ASM_METAG_DMA_MAPPING_H
+
+#include <linux/mm.h>
+
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <linux/scatterlist.h>
+#include <asm/bug.h>
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                        dma_addr_t *dma_handle, gfp_t flag);
+
+void dma_free_coherent(struct device *dev, size_t size,
+                      void *vaddr, dma_addr_t dma_handle);
+
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction);
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction);
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+                     void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+                         void *cpu_addr, dma_addr_t dma_addr, size_t size);
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(!valid_dma_direction(direction));
+       WARN_ON(size == 0);
+       dma_sync_for_device(ptr, size, direction);
+       return virt_to_phys(ptr);
+}
+
+static inline void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+                enum dma_data_direction direction)
+{
+       BUG_ON(!valid_dma_direction(direction));
+       dma_sync_for_cpu(phys_to_virt(dma_addr), size, direction);
+}
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents,
+          enum dma_data_direction direction)
+{
+       struct scatterlist *sg;
+       int i;
+
+       BUG_ON(!valid_dma_direction(direction));
+       WARN_ON(nents == 0 || sglist[0].length == 0);
+
+       for_each_sg(sglist, sg, nents, i) {
+               BUG_ON(!sg_page(sg));
+
+               sg->dma_address = sg_phys(sg);
+               dma_sync_for_device(sg_virt(sg), sg->length, direction);
+       }
+
+       return nents;
+}
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page, unsigned long offset,
+            size_t size, enum dma_data_direction direction)
+{
+       BUG_ON(!valid_dma_direction(direction));
+       dma_sync_for_device((void *)(page_to_phys(page) + offset), size,
+                           direction);
+       return page_to_phys(page) + offset;
+}
+
+static inline void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+              enum dma_data_direction direction)
+{
+       BUG_ON(!valid_dma_direction(direction));
+       dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
+}
+
+
+static inline void
+dma_unmap_sg(struct device *dev, struct scatterlist *sglist, int nhwentries,
+            enum dma_data_direction direction)
+{
+       struct scatterlist *sg;
+       int i;
+
+       BUG_ON(!valid_dma_direction(direction));
+       WARN_ON(nhwentries == 0 || sglist[0].length == 0);
+
+       for_each_sg(sglist, sg, nhwentries, i) {
+               BUG_ON(!sg_page(sg));
+
+               sg->dma_address = sg_phys(sg);
+               dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+       }
+}
+
+static inline void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+                       enum dma_data_direction direction)
+{
+       dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+                          size_t size, enum dma_data_direction direction)
+{
+       dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
+}
+
+static inline void
+dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle,
+                             unsigned long offset, size_t size,
+                             enum dma_data_direction direction)
+{
+       dma_sync_for_cpu(phys_to_virt(dma_handle)+offset, size,
+                        direction);
+}
+
+static inline void
+dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle,
+                                unsigned long offset, size_t size,
+                                enum dma_data_direction direction)
+{
+       dma_sync_for_device(phys_to_virt(dma_handle)+offset, size,
+                           direction);
+}
+
+static inline void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+                   enum dma_data_direction direction)
+{
+       int i;
+       for (i = 0; i < nelems; i++, sg++)
+               dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
+}
+
+static inline void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+                      enum dma_data_direction direction)
+{
+       int i;
+       for (i = 0; i < nelems; i++, sg++)
+               dma_sync_for_device(sg_virt(sg), sg->length, direction);
+}
+
+static inline int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return 0;
+}
+
+#define dma_supported(dev, mask)        (1)
+
+static inline int
+dma_set_mask(struct device *dev, u64 mask)
+{
+       if (!dev->dma_mask || !dma_supported(dev, mask))
+               return -EIO;
+
+       *dev->dma_mask = mask;
+
+       return 0;
+}
+
+/*
+ * dma_alloc_noncoherent() returns non-cacheable memory, so there's no need to
+ * do any flushing here.
+ */
+static inline void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+              enum dma_data_direction direction)
+{
+}
+
+/* drivers/base/dma-mapping.c */
+extern int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+                                 void *cpu_addr, dma_addr_t dma_addr,
+                                 size_t size);
+
+#define dma_get_sgtable(d, t, v, h, s) dma_common_get_sgtable(d, t, v, h, s)
+
+#endif
diff --git a/arch/metag/include/asm/elf.h b/arch/metag/include/asm/elf.h
new file mode 100644 (file)
index 0000000..d63b9d0
--- /dev/null
@@ -0,0 +1,128 @@
+#ifndef __ASM_METAG_ELF_H
+#define __ASM_METAG_ELF_H
+
+#define EM_METAG      174
+
+/* Meta relocations */
+#define R_METAG_HIADDR16                 0
+#define R_METAG_LOADDR16                 1
+#define R_METAG_ADDR32                   2
+#define R_METAG_NONE                     3
+#define R_METAG_RELBRANCH                4
+#define R_METAG_GETSETOFF                5
+
+/* Backward compatability */
+#define R_METAG_REG32OP1                 6
+#define R_METAG_REG32OP2                 7
+#define R_METAG_REG32OP3                 8
+#define R_METAG_REG16OP1                 9
+#define R_METAG_REG16OP2                10
+#define R_METAG_REG16OP3                11
+#define R_METAG_REG32OP4                12
+
+#define R_METAG_HIOG                    13
+#define R_METAG_LOOG                    14
+
+/* GNU */
+#define R_METAG_GNU_VTINHERIT           30
+#define R_METAG_GNU_VTENTRY             31
+
+/* PIC relocations */
+#define R_METAG_HI16_GOTOFF             32
+#define R_METAG_LO16_GOTOFF             33
+#define R_METAG_GETSET_GOTOFF           34
+#define R_METAG_GETSET_GOT              35
+#define R_METAG_HI16_GOTPC              36
+#define R_METAG_LO16_GOTPC              37
+#define R_METAG_HI16_PLT                38
+#define R_METAG_LO16_PLT                39
+#define R_METAG_RELBRANCH_PLT           40
+#define R_METAG_GOTOFF                  41
+#define R_METAG_PLT                     42
+#define R_METAG_COPY                    43
+#define R_METAG_JMP_SLOT                44
+#define R_METAG_RELATIVE                45
+#define R_METAG_GLOB_DAT                46
+
+/*
+ * ELF register definitions.
+ */
+
+#include <asm/page.h>
+#include <asm/processor.h>
+#include <asm/ptrace.h>
+#include <asm/user.h>
+
+typedef unsigned long elf_greg_t;
+
+#define ELF_NGREG (sizeof(struct user_gp_regs) / sizeof(elf_greg_t))
+typedef elf_greg_t elf_gregset_t[ELF_NGREG];
+
+typedef unsigned long elf_fpregset_t;
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) ((x)->e_machine == EM_METAG)
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS      ELFCLASS32
+#define ELF_DATA       ELFDATA2LSB
+#define ELF_ARCH       EM_METAG
+
+#define ELF_PLAT_INIT(_r, load_addr)   \
+       do { _r->ctx.AX[0].U0 = 0; } while (0)
+
+#define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
+#define ELF_EXEC_PAGESIZE      PAGE_SIZE
+
+/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
+   use of this is to invoke "./ld.so someprog" to test out a new version of
+   the loader.  We need to make sure that it is out of the way of the program
+   that it will "exec", and that there is sufficient room for the brk.  */
+
+#define ELF_ET_DYN_BASE         0x08000000UL
+
+#define ELF_CORE_COPY_REGS(_dest, _regs)                       \
+       memcpy((char *)&_dest, (char *)_regs, sizeof(struct pt_regs));
+
+/* This yields a mask that user programs can use to figure out what
+   instruction set this cpu supports.  */
+
+#define ELF_HWCAP      (0)
+
+/* This yields a string that ld.so will use to load implementation
+   specific libraries for optimization.  This is more specific in
+   intent than poking at uname or /proc/cpuinfo.  */
+
+#define ELF_PLATFORM  (NULL)
+
+#define SET_PERSONALITY(ex) \
+       set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
+
+#define STACK_RND_MASK (0)
+
+#ifdef CONFIG_METAG_USER_TCM
+
+struct elf32_phdr;
+struct file;
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+                             struct elf32_phdr *eppnt, int prot, int type,
+                             unsigned long total_size);
+
+static inline unsigned long metag_elf_map(struct file *filep,
+                                         unsigned long addr,
+                                         struct elf32_phdr *eppnt, int prot,
+                                         int type, unsigned long total_size)
+{
+       return __metag_elf_map(filep, addr, eppnt, prot, type, total_size);
+}
+#define elf_map metag_elf_map
+
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/fixmap.h b/arch/metag/include/asm/fixmap.h
new file mode 100644 (file)
index 0000000..3331275
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * fixmap.h: compile-time virtual memory allocation
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998 Ingo Molnar
+ *
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+ */
+
+#ifndef _ASM_FIXMAP_H
+#define _ASM_FIXMAP_H
+
+#include <asm/pgtable.h>
+#ifdef CONFIG_HIGHMEM
+#include <linux/threads.h>
+#include <asm/kmap_types.h>
+#endif
+
+/*
+ * Here we define all the compile-time 'special' virtual
+ * addresses. The point is to have a constant address at
+ * compile time, but to set the physical address only
+ * in the boot process. We allocate these special  addresses
+ * from the end of the consistent memory region backwards.
+ * Also this lets us do fail-safe vmalloc(), we
+ * can guarantee that these special addresses and
+ * vmalloc()-ed addresses never overlap.
+ *
+ * these 'compile-time allocated' memory buffers are
+ * fixed-size 4k pages. (or larger if used with an increment
+ * higher than 1) use fixmap_set(idx,phys) to associate
+ * physical memory with fixmap indices.
+ *
+ * TLB entries of such buffers will not be flushed across
+ * task switches.
+ */
+enum fixed_addresses {
+#define FIX_N_COLOURS 8
+#ifdef CONFIG_HIGHMEM
+       /* reserved pte's for temporary kernel mappings */
+       FIX_KMAP_BEGIN,
+       FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+#endif
+       __end_of_fixed_addresses
+};
+
+#define FIXADDR_TOP     (CONSISTENT_START - PAGE_SIZE)
+#define FIXADDR_SIZE   (__end_of_fixed_addresses << PAGE_SHIFT)
+#define FIXADDR_START  ((FIXADDR_TOP - FIXADDR_SIZE) & PMD_MASK)
+
+#define __fix_to_virt(x)       (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+#define __virt_to_fix(x)       ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
+extern void __this_fixmap_does_not_exist(void);
+/*
+ * 'index to address' translation. If anyone tries to use the idx
+ * directly without tranlation, we catch the bug with a NULL-deference
+ * kernel oops. Illegal ranges of incoming indices are caught too.
+ */
+static inline unsigned long fix_to_virt(const unsigned int idx)
+{
+       /*
+        * this branch gets completely eliminated after inlining,
+        * except when someone tries to use fixaddr indices in an
+        * illegal way. (such as mixing up address types or using
+        * out-of-range indices).
+        *
+        * If it doesn't get removed, the linker will complain
+        * loudly with a reasonably clear error message..
+        */
+       if (idx >= __end_of_fixed_addresses)
+               __this_fixmap_does_not_exist();
+
+       return __fix_to_virt(idx);
+}
+
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
+{
+       BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+       return __virt_to_fix(vaddr);
+}
+
+#define kmap_get_fixmap_pte(vaddr) \
+       pte_offset_kernel( \
+               pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), \
+               (vaddr) \
+       )
+
+/*
+ * Called from pgtable_init()
+ */
+extern void fixrange_init(unsigned long start, unsigned long end,
+       pgd_t *pgd_base);
+
+
+#endif
diff --git a/arch/metag/include/asm/ftrace.h b/arch/metag/include/asm/ftrace.h
new file mode 100644 (file)
index 0000000..2901f0f
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _ASM_METAG_FTRACE
+#define _ASM_METAG_FTRACE
+
+#ifdef CONFIG_FUNCTION_TRACER
+#define MCOUNT_INSN_SIZE       8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void mcount_wrapper(void);
+#define MCOUNT_ADDR            ((long)(mcount_wrapper))
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+       return addr;
+}
+
+struct dyn_arch_ftrace {
+       /* No extra data needed on metag */
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_METAG_FTRACE */
diff --git a/arch/metag/include/asm/global_lock.h b/arch/metag/include/asm/global_lock.h
new file mode 100644 (file)
index 0000000..fc831c8
--- /dev/null
@@ -0,0 +1,100 @@
+#ifndef __ASM_METAG_GLOBAL_LOCK_H
+#define __ASM_METAG_GLOBAL_LOCK_H
+
+#include <asm/metag_mem.h>
+
+/**
+ * __global_lock1() - Acquire global voluntary lock (LOCK1).
+ * @flags:     Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock (LOCK1), also taking care to disable
+ * all triggers so we cannot be interrupted, and to enforce a compiler barrier
+ * so that the compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to acquire the voluntary or exclusive
+ * locks until the voluntary lock is released with @__global_unlock1, but they
+ * may continue to execute as long as they aren't trying to acquire either of
+ * the locks.
+ */
+#define __global_lock1(flags) do {                                     \
+       unsigned int __trval;                                           \
+       asm volatile("MOV       %0,#0\n\t"                              \
+                    "SWAP      %0,TXMASKI\n\t"                         \
+                    "LOCK1"                                            \
+                    : "=r" (__trval)                                   \
+                    :                                                  \
+                    : "memory");                                       \
+       (flags) = __trval;                                              \
+} while (0)
+
+/**
+ * __global_unlock1() - Release global voluntary lock (LOCK1).
+ * @flags:     Variable to restore flags from.
+ *
+ * Releases the Meta global voluntary lock (LOCK1) acquired with
+ * @__global_lock1, also taking care to re-enable triggers, and to enforce a
+ * compiler barrier so that the compiler cannot reorder memory accesses across
+ * the unlock.
+ *
+ * This immediately allows another hardware thread to acquire the voluntary or
+ * exclusive locks.
+ */
+#define __global_unlock1(flags) do {                                   \
+       unsigned int __trval = (flags);                                 \
+       asm volatile("LOCK0\n\t"                                        \
+                    "MOV       TXMASKI,%0"                             \
+                    :                                                  \
+                    : "r" (__trval)                                    \
+                    : "memory");                                       \
+} while (0)
+
+/**
+ * __global_lock2() - Acquire global exclusive lock (LOCK2).
+ * @flags:     Variable to store flags into.
+ *
+ * Acquires the Meta global voluntary lock and global exclusive lock (LOCK2),
+ * also taking care to disable all triggers so we cannot be interrupted, to take
+ * the atomic lock (system event) and to enforce a compiler barrier so that the
+ * compiler cannot reorder memory accesses across the lock.
+ *
+ * No other hardware thread will be able to execute code until the locks are
+ * released with @__global_unlock2.
+ */
+#define __global_lock2(flags) do {                                     \
+       unsigned int __trval;                                           \
+       unsigned int __aloc_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+       asm volatile("MOV       %0,#0\n\t"                              \
+                    "SWAP      %0,TXMASKI\n\t"                         \
+                    "LOCK2\n\t"                                        \
+                    "SETD      [%1+#0x40],D1RtP"                       \
+                    : "=r&" (__trval)                                  \
+                    : "u" (__aloc_hi)                                  \
+                    : "memory");                                       \
+       (flags) = __trval;                                              \
+} while (0)
+
+/**
+ * __global_unlock2() - Release global exclusive lock (LOCK2).
+ * @flags:     Variable to restore flags from.
+ *
+ * Releases the Meta global exclusive lock (LOCK2) and global voluntary lock
+ * acquired with @__global_lock2, also taking care to release the atomic lock
+ * (system event), re-enable triggers, and to enforce a compiler barrier so that
+ * the compiler cannot reorder memory accesses across the unlock.
+ *
+ * This immediately allows other hardware threads to continue executing and one
+ * of them to acquire locks.
+ */
+#define __global_unlock2(flags) do {                                   \
+       unsigned int __trval = (flags);                                 \
+       unsigned int __alock_hi = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000; \
+       asm volatile("SETD      [%1+#0x00],D1RtP\n\t"                   \
+                    "LOCK0\n\t"                                        \
+                    "MOV       TXMASKI,%0"                             \
+                    :                                                  \
+                    : "r" (__trval),                                   \
+                      "u" (__alock_hi)                                 \
+                    : "memory");                                       \
+} while (0)
+
+#endif /* __ASM_METAG_GLOBAL_LOCK_H */
diff --git a/arch/metag/include/asm/gpio.h b/arch/metag/include/asm/gpio.h
new file mode 100644 (file)
index 0000000..b3799d8
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef __LINUX_GPIO_H
+#warning Include linux/gpio.h instead of asm/gpio.h
+#include <linux/gpio.h>
+#endif
diff --git a/arch/metag/include/asm/highmem.h b/arch/metag/include/asm/highmem.h
new file mode 100644 (file)
index 0000000..6646a15
--- /dev/null
@@ -0,0 +1,62 @@
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <asm/cacheflush.h>
+#include <asm/kmap_types.h>
+#include <asm/fixmap.h>
+
+/*
+ * Right now we initialize only a single pte table. It can be extended
+ * easily, subsequent pte tables have to be allocated in one physical
+ * chunk of RAM.
+ */
+/*
+ * Ordering is (from lower to higher memory addresses):
+ *
+ * high_memory
+ *                     Persistent kmap area
+ * PKMAP_BASE
+ *                     fixed_addresses
+ * FIXADDR_START
+ * FIXADDR_TOP
+ *                     Vmalloc area
+ * VMALLOC_START
+ * VMALLOC_END
+ */
+#define PKMAP_BASE             (FIXADDR_START - PMD_SIZE)
+#define LAST_PKMAP             PTRS_PER_PTE
+#define LAST_PKMAP_MASK                (LAST_PKMAP - 1)
+#define PKMAP_NR(virt)         (((virt) - PKMAP_BASE) >> PAGE_SHIFT)
+#define PKMAP_ADDR(nr)         (PKMAP_BASE + ((nr) << PAGE_SHIFT))
+
+#define kmap_prot              PAGE_KERNEL
+
+static inline void flush_cache_kmaps(void)
+{
+       flush_cache_all();
+}
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *pkmap_page_table;
+
+extern void *kmap_high(struct page *page);
+extern void kunmap_high(struct page *page);
+
+extern void kmap_init(void);
+
+/*
+ * The following functions are already defined by <linux/highmem.h>
+ * when CONFIG_HIGHMEM is not set.
+ */
+#ifdef CONFIG_HIGHMEM
+extern void *kmap(struct page *page);
+extern void kunmap(struct page *page);
+extern void *kmap_atomic(struct page *page);
+extern void __kunmap_atomic(void *kvaddr);
+extern void *kmap_atomic_pfn(unsigned long pfn);
+extern struct page *kmap_atomic_to_page(void *ptr);
+#endif
+
+#endif
diff --git a/arch/metag/include/asm/hugetlb.h b/arch/metag/include/asm/hugetlb.h
new file mode 100644 (file)
index 0000000..f545477
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef _ASM_METAG_HUGETLB_H
+#define _ASM_METAG_HUGETLB_H
+
+#include <asm/page.h>
+
+
+static inline int is_hugepage_only_range(struct mm_struct *mm,
+                                        unsigned long addr,
+                                        unsigned long len) {
+       return 0;
+}
+
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+                                               unsigned long len);
+
+static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
+{
+}
+
+static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb,
+                                         unsigned long addr, unsigned long end,
+                                         unsigned long floor,
+                                         unsigned long ceiling)
+{
+       free_pgd_range(tlb, addr, end, floor, ceiling);
+}
+
+static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+                                  pte_t *ptep, pte_t pte)
+{
+       set_pte_at(mm, addr, ptep, pte);
+}
+
+static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+                                           unsigned long addr, pte_t *ptep)
+{
+       return ptep_get_and_clear(mm, addr, ptep);
+}
+
+static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
+                                        unsigned long addr, pte_t *ptep)
+{
+}
+
+static inline int huge_pte_none(pte_t pte)
+{
+       return pte_none(pte);
+}
+
+static inline pte_t huge_pte_wrprotect(pte_t pte)
+{
+       return pte_wrprotect(pte);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+                                          unsigned long addr, pte_t *ptep)
+{
+       ptep_set_wrprotect(mm, addr, ptep);
+}
+
+static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+                                            unsigned long addr, pte_t *ptep,
+                                            pte_t pte, int dirty)
+{
+       return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+}
+
+static inline pte_t huge_ptep_get(pte_t *ptep)
+{
+       return *ptep;
+}
+
+static inline int arch_prepare_hugepage(struct page *page)
+{
+       return 0;
+}
+
+static inline void arch_release_hugepage(struct page *page)
+{
+}
+
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#endif /* _ASM_METAG_HUGETLB_H */
diff --git a/arch/metag/include/asm/hwthread.h b/arch/metag/include/asm/hwthread.h
new file mode 100644 (file)
index 0000000..8f97866
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies
+ */
+#ifndef __METAG_HWTHREAD_H
+#define __METAG_HWTHREAD_H
+
+#include <linux/bug.h>
+#include <linux/io.h>
+
+#include <asm/metag_mem.h>
+
+#define BAD_HWTHREAD_ID                (0xFFU)
+#define BAD_CPU_ID             (0xFFU)
+
+extern u8 cpu_2_hwthread_id[];
+extern u8 hwthread_id_2_cpu[];
+
+/*
+ * Each hardware thread's Control Unit registers are memory-mapped
+ * and can therefore be accessed by any other hardware thread.
+ *
+ * This helper function returns the memory address where "thread"'s
+ * register "regnum" is mapped.
+ */
+static inline
+void __iomem *__CU_addr(unsigned int thread, unsigned int regnum)
+{
+       unsigned int base, thread_offset, thread_regnum;
+
+       WARN_ON(thread == BAD_HWTHREAD_ID);
+
+       base = T0UCTREG0;       /* Control unit base */
+
+       thread_offset = TnUCTRX_STRIDE * thread;
+       thread_regnum = TXUCTREGn_STRIDE * regnum;
+
+       return (void __iomem *)(base + thread_offset + thread_regnum);
+}
+
+#endif /* __METAG_HWTHREAD_H */
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
new file mode 100644 (file)
index 0000000..9359e50
--- /dev/null
@@ -0,0 +1,165 @@
+#ifndef _ASM_METAG_IO_H
+#define _ASM_METAG_IO_H
+
+#include <linux/types.h>
+
+#define IO_SPACE_LIMIT  0
+
+#define page_to_bus page_to_phys
+#define bus_to_page phys_to_page
+
+/*
+ * Generic I/O
+ */
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+       u8 ret;
+       asm volatile("GETB %0,[%1]"
+                    : "=da" (ret)
+                    : "da" (addr)
+                    : "memory");
+       return ret;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+       u16 ret;
+       asm volatile("GETW %0,[%1]"
+                    : "=da" (ret)
+                    : "da" (addr)
+                    : "memory");
+       return ret;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+       u32 ret;
+       asm volatile("GETD %0,[%1]"
+                    : "=da" (ret)
+                    : "da" (addr)
+                    : "memory");
+       return ret;
+}
+
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+       u64 ret;
+       asm volatile("GETL %0,%t0,[%1]"
+                    : "=da" (ret)
+                    : "da" (addr)
+                    : "memory");
+       return ret;
+}
+
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 b, volatile void __iomem *addr)
+{
+       asm volatile("SETB [%0],%1"
+                    :
+                    : "da" (addr),
+                      "da" (b)
+                    : "memory");
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 b, volatile void __iomem *addr)
+{
+       asm volatile("SETW [%0],%1"
+                    :
+                    : "da" (addr),
+                      "da" (b)
+                    : "memory");
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 b, volatile void __iomem *addr)
+{
+       asm volatile("SETD [%0],%1"
+                    :
+                    : "da" (addr),
+                      "da" (b)
+                    : "memory");
+}
+
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
+{
+       asm volatile("SETL [%0],%1,%t1"
+                    :
+                    : "da" (addr),
+                      "da" (b)
+                    : "memory");
+}
+
+/*
+ * The generic io.h can define all the other generic accessors
+ */
+
+#include <asm-generic/io.h>
+
+/*
+ * Despite being a 32bit architecture, Meta can do 64bit memory accesses
+ * (assuming the bus supports it).
+ */
+
+#define readq  __raw_readq
+#define writeq __raw_writeq
+
+/*
+ * Meta specific I/O for accessing non-MMU areas.
+ *
+ * These can be provided with a physical address rather than an __iomem pointer
+ * and should only be used by core architecture code for accessing fixed core
+ * registers. Generic drivers should use ioremap and the generic I/O accessors.
+ */
+
+#define metag_in8(addr)                __raw_readb((volatile void __iomem *)(addr))
+#define metag_in16(addr)       __raw_readw((volatile void __iomem *)(addr))
+#define metag_in32(addr)       __raw_readl((volatile void __iomem *)(addr))
+#define metag_in64(addr)       __raw_readq((volatile void __iomem *)(addr))
+
+#define metag_out8(b, addr)    __raw_writeb(b, (volatile void __iomem *)(addr))
+#define metag_out16(b, addr)   __raw_writew(b, (volatile void __iomem *)(addr))
+#define metag_out32(b, addr)   __raw_writel(b, (volatile void __iomem *)(addr))
+#define metag_out64(b, addr)   __raw_writeq(b, (volatile void __iomem *)(addr))
+
+/*
+ * io remapping functions
+ */
+
+extern void __iomem *__ioremap(unsigned long offset,
+                              size_t size, unsigned long flags);
+extern void __iounmap(void __iomem *addr);
+
+/**
+ *     ioremap         -       map bus memory into CPU space
+ *     @offset:        bus address of the memory
+ *     @size:          size of the resource to map
+ *
+ *     ioremap performs a platform specific sequence of operations to
+ *     make bus memory CPU accessible via the readb/readw/readl/writeb/
+ *     writew/writel functions and the other mmio helpers. The returned
+ *     address is not guaranteed to be usable directly as a virtual
+ *     address.
+ */
+#define ioremap(offset, size)                   \
+       __ioremap((offset), (size), 0)
+
+#define ioremap_nocache(offset, size)           \
+       __ioremap((offset), (size), 0)
+
+#define ioremap_cached(offset, size)            \
+       __ioremap((offset), (size), _PAGE_CACHEABLE)
+
+#define ioremap_wc(offset, size)                \
+       __ioremap((offset), (size), _PAGE_WR_COMBINE)
+
+#define iounmap(addr)                           \
+       __iounmap(addr)
+
+#endif  /* _ASM_METAG_IO_H */
diff --git a/arch/metag/include/asm/irq.h b/arch/metag/include/asm/irq.h
new file mode 100644 (file)
index 0000000..be0c8f3
--- /dev/null
@@ -0,0 +1,32 @@
+#ifndef __ASM_METAG_IRQ_H
+#define __ASM_METAG_IRQ_H
+
+#ifdef CONFIG_4KSTACKS
+extern void irq_ctx_init(int cpu);
+extern void irq_ctx_exit(int cpu);
+# define __ARCH_HAS_DO_SOFTIRQ
+#else
+# define irq_ctx_init(cpu) do { } while (0)
+# define irq_ctx_exit(cpu) do { } while (0)
+#endif
+
+void tbi_startup_interrupt(int);
+void tbi_shutdown_interrupt(int);
+
+struct pt_regs;
+
+int tbisig_map(unsigned int hw);
+extern void do_IRQ(int irq, struct pt_regs *regs);
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+int traps_save_context(void);
+int traps_restore_context(void);
+#endif
+
+#include <asm-generic/irq.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void migrate_irqs(void);
+#endif
+
+#endif /* __ASM_METAG_IRQ_H */
diff --git a/arch/metag/include/asm/irqflags.h b/arch/metag/include/asm/irqflags.h
new file mode 100644 (file)
index 0000000..339b16f
--- /dev/null
@@ -0,0 +1,93 @@
+/*
+ * IRQ flags handling
+ *
+ * This file gets included from lowlevel asm headers too, to provide
+ * wrapped versions of the local_irq_*() APIs, based on the
+ * raw_local_irq_*() functions from the lowlevel headers.
+ */
+#ifndef _ASM_IRQFLAGS_H
+#define _ASM_IRQFLAGS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/core_reg.h>
+#include <asm/metag_regs.h>
+
+#define INTS_OFF_MASK TXSTATI_BGNDHALT_BIT
+
+#ifdef CONFIG_SMP
+extern unsigned int get_trigger_mask(void);
+#else
+
+extern unsigned int global_trigger_mask;
+
+static inline unsigned int get_trigger_mask(void)
+{
+       return global_trigger_mask;
+}
+#endif
+
+static inline unsigned long arch_local_save_flags(void)
+{
+       return __core_reg_get(TXMASKI);
+}
+
+static inline int arch_irqs_disabled_flags(unsigned long flags)
+{
+       return (flags & ~INTS_OFF_MASK) == 0;
+}
+
+static inline int arch_irqs_disabled(void)
+{
+       unsigned long flags = arch_local_save_flags();
+
+       return arch_irqs_disabled_flags(flags);
+}
+
+static inline unsigned long __irqs_disabled(void)
+{
+       /*
+        * We shouldn't enable exceptions if they are not already
+        * enabled. This is required for chancalls to work correctly.
+        */
+       return arch_local_save_flags() & INTS_OFF_MASK;
+}
+
+/*
+ * For spinlocks, etc:
+ */
+static inline unsigned long arch_local_irq_save(void)
+{
+       unsigned long flags = __irqs_disabled();
+
+       asm volatile("SWAP %0,TXMASKI\n" : "=r" (flags) : "0" (flags)
+                    : "memory");
+
+       return flags;
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+       asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+static inline void arch_local_irq_disable(void)
+{
+       unsigned long flags = __irqs_disabled();
+
+       asm volatile("MOV TXMASKI,%0\n" : : "r" (flags) : "memory");
+}
+
+#ifdef CONFIG_SMP
+/* Avoid circular include dependencies through <linux/preempt.h> */
+void arch_local_irq_enable(void);
+#else
+static inline void arch_local_irq_enable(void)
+{
+       arch_local_irq_restore(get_trigger_mask());
+}
+#endif
+
+#endif /* (__ASSEMBLY__) */
+
+#endif /* !(_ASM_IRQFLAGS_H) */
diff --git a/arch/metag/include/asm/l2cache.h b/arch/metag/include/asm/l2cache.h
new file mode 100644 (file)
index 0000000..bffbeaa
--- /dev/null
@@ -0,0 +1,258 @@
+#ifndef _METAG_L2CACHE_H
+#define _METAG_L2CACHE_H
+
+#ifdef CONFIG_METAG_L2C
+
+#include <asm/global_lock.h>
+#include <asm/io.h>
+
+/*
+ * Store the last known value of pfenable (we don't want prefetch enabled while
+ * L2 is off).
+ */
+extern int l2c_pfenable;
+
+/* defined in arch/metag/drivers/core-sysfs.c */
+extern struct sysdev_class cache_sysclass;
+
+static inline void wr_fence(void);
+
+/*
+ * Functions for reading of L2 cache configuration.
+ */
+
+/* Get raw L2 config register (CORE_CONFIG3) */
+static inline unsigned int meta_l2c_config(void)
+{
+       const unsigned int *corecfg3 = (const unsigned int *)METAC_CORE_CONFIG3;
+       return *corecfg3;
+}
+
+/* Get whether the L2 is present */
+static inline int meta_l2c_is_present(void)
+{
+       return meta_l2c_config() & METAC_CORECFG3_L2C_HAVE_L2C_BIT;
+}
+
+/* Get whether the L2 is configured for write-back instead of write-through */
+static inline int meta_l2c_is_writeback(void)
+{
+       return meta_l2c_config() & METAC_CORECFG3_L2C_MODE_BIT;
+}
+
+/* Get whether the L2 is unified instead of separated code/data */
+static inline int meta_l2c_is_unified(void)
+{
+       return meta_l2c_config() & METAC_CORECFG3_L2C_UNIFIED_BIT;
+}
+
+/* Get the L2 cache size in bytes */
+static inline unsigned int meta_l2c_size(void)
+{
+       unsigned int size_s;
+       if (!meta_l2c_is_present())
+               return 0;
+       size_s = (meta_l2c_config() & METAC_CORECFG3_L2C_SIZE_BITS)
+                       >> METAC_CORECFG3_L2C_SIZE_S;
+       /* L2CSIZE is in KiB */
+       return 1024 << size_s;
+}
+
+/* Get the number of ways in the L2 cache */
+static inline unsigned int meta_l2c_ways(void)
+{
+       unsigned int ways_s;
+       if (!meta_l2c_is_present())
+               return 0;
+       ways_s = (meta_l2c_config() & METAC_CORECFG3_L2C_NUM_WAYS_BITS)
+                       >> METAC_CORECFG3_L2C_NUM_WAYS_S;
+       return 0x1 << ways_s;
+}
+
+/* Get the line size of the L2 cache */
+static inline unsigned int meta_l2c_linesize(void)
+{
+       unsigned int line_size;
+       if (!meta_l2c_is_present())
+               return 0;
+       line_size = (meta_l2c_config() & METAC_CORECFG3_L2C_LINE_SIZE_BITS)
+                       >> METAC_CORECFG3_L2C_LINE_SIZE_S;
+       switch (line_size) {
+       case METAC_CORECFG3_L2C_LINE_SIZE_64B:
+               return 64;
+       default:
+               return 0;
+       }
+}
+
+/* Get the revision ID of the L2 cache */
+static inline unsigned int meta_l2c_revision(void)
+{
+       return (meta_l2c_config() & METAC_CORECFG3_L2C_REV_ID_BITS)
+                       >> METAC_CORECFG3_L2C_REV_ID_S;
+}
+
+
+/*
+ * Start an initialisation of the L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section while the L2
+ * is disabled.
+ */
+static inline void _meta_l2c_init(void)
+{
+       metag_out32(SYSC_L2C_INIT_INIT, SYSC_L2C_INIT);
+       while (metag_in32(SYSC_L2C_INIT) == SYSC_L2C_INIT_IN_PROGRESS)
+               /* do nothing */;
+}
+
+/*
+ * Start a writeback of dirty L2 cachelines and wait for completion.
+ * This should only be done in a LOCK1 or LOCK2 critical section.
+ */
+static inline void _meta_l2c_purge(void)
+{
+       metag_out32(SYSC_L2C_PURGE_PURGE, SYSC_L2C_PURGE);
+       while (metag_in32(SYSC_L2C_PURGE) == SYSC_L2C_PURGE_IN_PROGRESS)
+               /* do nothing */;
+}
+
+/* Set whether the L2 cache is enabled. */
+static inline void _meta_l2c_enable(int enabled)
+{
+       unsigned int enable;
+
+       enable = metag_in32(SYSC_L2C_ENABLE);
+       if (enabled)
+               enable |= SYSC_L2C_ENABLE_ENABLE_BIT;
+       else
+               enable &= ~SYSC_L2C_ENABLE_ENABLE_BIT;
+       metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Set whether the L2 cache prefetch is enabled. */
+static inline void _meta_l2c_pf_enable(int pfenabled)
+{
+       unsigned int enable;
+
+       enable = metag_in32(SYSC_L2C_ENABLE);
+       if (pfenabled)
+               enable |= SYSC_L2C_ENABLE_PFENABLE_BIT;
+       else
+               enable &= ~SYSC_L2C_ENABLE_PFENABLE_BIT;
+       metag_out32(enable, SYSC_L2C_ENABLE);
+}
+
+/* Return whether the L2 cache is enabled */
+static inline int _meta_l2c_is_enabled(void)
+{
+       return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_ENABLE_BIT;
+}
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int _meta_l2c_pf_is_enabled(void)
+{
+       return metag_in32(SYSC_L2C_ENABLE) & SYSC_L2C_ENABLE_PFENABLE_BIT;
+}
+
+
+/* Return whether the L2 cache is enabled */
+static inline int meta_l2c_is_enabled(void)
+{
+       int en;
+
+       /*
+        * There is no need to lock at the moment, as the enable bit is never
+        * intermediately changed, so we will never see an intermediate result.
+        */
+       en = _meta_l2c_is_enabled();
+
+       return en;
+}
+
+/*
+ * Ensure the L2 cache is disabled.
+ * Return whether the L2 was previously disabled.
+ */
+int meta_l2c_disable(void);
+
+/*
+ * Ensure the L2 cache is enabled.
+ * Return whether the L2 was previously enabled.
+ */
+int meta_l2c_enable(void);
+
+/* Return whether the L2 cache prefetch is enabled */
+static inline int meta_l2c_pf_is_enabled(void)
+{
+       return l2c_pfenable;
+}
+
+/*
+ * Set whether the L2 cache prefetch is enabled.
+ * Return whether the L2 prefetch was previously enabled.
+ */
+int meta_l2c_pf_enable(int pfenable);
+
+/*
+ * Flush the L2 cache.
+ * Return 1 if the L2 is disabled.
+ */
+int meta_l2c_flush(void);
+
+/*
+ * Write back all dirty cache lines in the L2 cache.
+ * Return 1 if the L2 is disabled or there isn't any writeback.
+ */
+static inline int meta_l2c_writeback(void)
+{
+       unsigned long flags;
+       int en;
+
+       /* no need to purge if it's not a writeback cache */
+       if (!meta_l2c_is_writeback())
+               return 1;
+
+       /*
+        * Purge only works if the L2 is enabled, and involves reading back to
+        * detect completion, so keep this operation atomic with other threads.
+        */
+       __global_lock1(flags);
+       en = meta_l2c_is_enabled();
+       if (likely(en)) {
+               wr_fence();
+               _meta_l2c_purge();
+       }
+       __global_unlock1(flags);
+
+       return !en;
+}
+
+#else /* CONFIG_METAG_L2C */
+
+#define meta_l2c_config()              0
+#define meta_l2c_is_present()          0
+#define meta_l2c_is_writeback()                0
+#define meta_l2c_is_unified()          0
+#define meta_l2c_size()                        0
+#define meta_l2c_ways()                        0
+#define meta_l2c_linesize()            0
+#define meta_l2c_revision()            0
+
+#define meta_l2c_is_enabled()          0
+#define _meta_l2c_pf_is_enabled()      0
+#define meta_l2c_pf_is_enabled()       0
+#define meta_l2c_disable()             1
+#define meta_l2c_enable()              0
+#define meta_l2c_pf_enable(X)          0
+static inline int meta_l2c_flush(void)
+{
+       return 1;
+}
+static inline int meta_l2c_writeback(void)
+{
+       return 1;
+}
+
+#endif /* CONFIG_METAG_L2C */
+
+#endif /* _METAG_L2CACHE_H */
diff --git a/arch/metag/include/asm/linkage.h b/arch/metag/include/asm/linkage.h
new file mode 100644 (file)
index 0000000..73bf25b
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef __ASM_LINKAGE_H
+#define __ASM_LINKAGE_H
+
+#define __ALIGN .p2align 2
+#define __ALIGN_STR ".p2align 2"
+
+#endif
diff --git a/arch/metag/include/asm/mach/arch.h b/arch/metag/include/asm/mach/arch.h
new file mode 100644 (file)
index 0000000..12c5664
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * arch/metag/include/asm/mach/arch.h
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * based on the ARM version:
+ *  Copyright (C) 2000 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _METAG_MACH_ARCH_H_
+#define _METAG_MACH_ARCH_H_
+
+#include <linux/stddef.h>
+
+#include <asm/clock.h>
+
+/**
+ * struct machine_desc - Describes a board controlled by a Meta.
+ * @name:              Board/SoC name.
+ * @dt_compat:         Array of device tree 'compatible' strings.
+ * @clocks:            Clock callbacks.
+ *
+ * @nr_irqs:           Maximum number of IRQs.
+ *                     If 0, defaults to NR_IRQS in asm-generic/irq.h.
+ *
+ * @init_early:                Early init callback.
+ * @init_irq:          IRQ init callback for setting up IRQ controllers.
+ * @init_machine:      Arch init callback for setting up devices.
+ * @init_late:         Late init callback.
+ *
+ * This structure is provided by each board which can be controlled by a Meta.
+ * It is chosen by matching the compatible strings in the device tree provided
+ * by the bootloader with the strings in @dt_compat, and sets up any aspects of
+ * the machine that aren't configured with device tree (yet).
+ */
+struct machine_desc {
+       const char              *name;
+       const char              **dt_compat;
+       struct meta_clock_desc  *clocks;
+
+       unsigned int            nr_irqs;
+
+       void                    (*init_early)(void);
+       void                    (*init_irq)(void);
+       void                    (*init_machine)(void);
+       void                    (*init_late)(void);
+};
+
+/*
+ * Current machine - only accessible during boot.
+ */
+extern struct machine_desc *machine_desc;
+
+/*
+ * Machine type table - also only accessible during boot
+ */
+extern struct machine_desc __arch_info_begin[], __arch_info_end[];
+#define for_each_machine_desc(p)                       \
+       for (p = __arch_info_begin; p < __arch_info_end; p++)
+
+static inline struct machine_desc *default_machine_desc(void)
+{
+       /* the default machine is the last one linked in */
+       if (__arch_info_end - 1 < __arch_info_begin)
+               return NULL;
+       return __arch_info_end - 1;
+}
+
+/*
+ * Set of macros to define architecture features.  This is built into
+ * a table by the linker.
+ */
+#define MACHINE_START(_type, _name)                    \
+static const struct machine_desc __mach_desc_##_type   \
+__used                                                 \
+__attribute__((__section__(".arch.info.init"))) = {    \
+       .name           = _name,
+
+#define MACHINE_END                            \
+};
+
+#endif /* _METAG_MACH_ARCH_H_ */
diff --git a/arch/metag/include/asm/metag_isa.h b/arch/metag/include/asm/metag_isa.h
new file mode 100644 (file)
index 0000000..c8aa2ae
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * asm/metag_isa.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta instruction set.
+ */
+
+#ifndef _ASM_METAG_ISA_H_
+#define _ASM_METAG_ISA_H_
+
+
+/* L1 cache layout */
+
+/* Data cache line size as bytes and shift */
+#define DCACHE_LINE_BYTES 64
+#define DCACHE_LINE_S     6
+
+/* Number of ways in the data cache */
+#define DCACHE_WAYS       4
+
+/* Instruction cache line size as bytes and shift */
+#define ICACHE_LINE_BYTES 64
+#define ICACHE_LINE_S     6
+
+/* Number of ways in the instruction cache */
+#define ICACHE_WAYS       4
+
+
+/*
+ * CACHEWD/CACHEWL instructions use the bottom 8 bits of the data presented to
+ * control the operation actually achieved.
+ */
+/* Use of these two bits should be discouraged since the bits dont have
+ * consistent meanings
+ */
+#define CACHEW_ICACHE_BIT           0x01
+#define CACHEW_TLBFLUSH_BIT         0x02
+
+#define CACHEW_FLUSH_L1D_L2         0x0
+#define CACHEW_INVALIDATE_L1I       0x1
+#define CACHEW_INVALIDATE_L1DTLB    0x2
+#define CACHEW_INVALIDATE_L1ITLB    0x3
+#define CACHEW_WRITEBACK_L1D_L2     0x4
+#define CACHEW_INVALIDATE_L1D       0x8
+#define CACHEW_INVALIDATE_L1D_L2    0xC
+
+/*
+ * CACHERD/CACHERL instructions use bits 3:5 of the address presented to
+ * control the operation achieved and hence the specific result.
+ */
+#define CACHER_ADDR_BITS            0xFFFFFFC0
+#define CACHER_OPER_BITS            0x00000030
+#define CACHER_OPER_S               4
+#define     CACHER_OPER_LINPHY          0
+#define CACHER_ICACHE_BIT           0x00000008
+#define CACHER_ICACHE_S             3
+
+/*
+ * CACHERD/CACHERL LINPHY Oper result is one/two 32-bit words
+ *
+ *  If CRLINPHY0_VAL_BIT (Bit 0) set then,
+ *      Lower 32-bits corresponds to MMCU_ENTRY_* above.
+ *      Upper 32-bits corresponds to CRLINPHY1_* values below (if requested).
+ *  else
+ *      Lower 32-bits corresponds to CRLINPHY0_* values below.
+ *      Upper 32-bits undefined.
+ */
+#define CRLINPHY0_VAL_BIT      0x00000001
+#define CRLINPHY0_FIRST_BIT    0x00000004 /* Set if VAL=0 due to first level */
+
+#define CRLINPHY1_READ_BIT     0x00000001 /* Set if reads permitted          */
+#define CRLINPHY1_SINGLE_BIT   0x00000004 /* Set if TLB does not cache entry */
+#define CRLINPHY1_PAGEMSK_BITS 0x0000FFF0 /* Set to ((2^n-1)>>12) value      */
+#define CRLINPHY1_PAGEMSK_S    4
+
+#endif /* _ASM_METAG_ISA_H_ */
diff --git a/arch/metag/include/asm/metag_mem.h b/arch/metag/include/asm/metag_mem.h
new file mode 100644 (file)
index 0000000..3f7b54d
--- /dev/null
@@ -0,0 +1,1106 @@
+/*
+ * asm/metag_mem.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta (memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_MEM_H_
+#define _ASM_METAG_MEM_H_
+
+/*****************************************************************************
+ *                   META MEMORY MAP LINEAR ADDRESS VALUES
+ ****************************************************************************/
+/*
+ * COMMON MEMORY MAP
+ * -----------------
+ */
+
+#define LINSYSTEM_BASE  0x00200000
+#define LINSYSTEM_LIMIT 0x07FFFFFF
+
+/* Linear cache flush now implemented via DCACHE instruction. These defines
+   related to a special region that used to exist for achieving cache flushes.
+ */
+#define         LINSYSLFLUSH_S 0
+
+#define     LINSYSRES0_BASE     0x00200000
+#define     LINSYSRES0_LIMIT    0x01FFFFFF
+
+#define     LINSYSCUSTOM_BASE 0x02000000
+#define     LINSYSCUSTOM_LIMIT   0x02FFFFFF
+
+#define     LINSYSEXPAND_BASE 0x03000000
+#define     LINSYSEXPAND_LIMIT   0x03FFFFFF
+
+#define     LINSYSEVENT_BASE  0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_UNLOCK    0x04000000
+#define         LINSYSEVENT_WR_ATOMIC_LOCK      0x04000040
+#define         LINSYSEVENT_WR_CACHE_DISABLE    0x04000080
+#define         LINSYSEVENT_WR_CACHE_ENABLE     0x040000C0
+#define         LINSYSEVENT_WR_COMBINE_FLUSH    0x04000100
+#define         LINSYSEVENT_WR_FENCE            0x04000140
+#define     LINSYSEVENT_LIMIT   0x04000FFF
+
+#define     LINSYSCFLUSH_BASE   0x04400000
+#define         LINSYSCFLUSH_DCACHE_LINE    0x04400000
+#define         LINSYSCFLUSH_ICACHE_LINE    0x04500000
+#define         LINSYSCFLUSH_MMCU           0x04700000
+#ifndef METAC_1_2
+#define         LINSYSCFLUSH_TxMMCU_BASE    0x04700020
+#define         LINSYSCFLUSH_TxMMCU_STRIDE  0x00000008
+#endif
+#define         LINSYSCFLUSH_ADDR_BITS      0x000FFFFF
+#define         LINSYSCFLUSH_ADDR_S         0
+#define     LINSYSCFLUSH_LIMIT  0x047FFFFF
+
+#define     LINSYSCTRL_BASE     0x04800000
+#define     LINSYSCTRL_LIMIT    0x04FFFFFF
+
+#define     LINSYSMTABLE_BASE   0x05000000
+#define     LINSYSMTABLE_LIMIT  0x05FFFFFF
+
+#define     LINSYSDIRECT_BASE   0x06000000
+#define     LINSYSDIRECT_LIMIT  0x07FFFFFF
+
+#define LINLOCAL_BASE   0x08000000
+#define LINLOCAL_LIMIT  0x7FFFFFFF
+
+#define LINCORE_BASE    0x80000000
+#define LINCORE_LIMIT   0x87FFFFFF
+
+#define LINCORE_CODE_BASE  0x80000000
+#define LINCORE_CODE_LIMIT 0x81FFFFFF
+
+#define LINCORE_DATA_BASE  0x82000000
+#define LINCORE_DATA_LIMIT 0x83FFFFFF
+
+
+/* The core can support locked icache lines in this region */
+#define LINCORE_ICACHE_BASE  0x84000000
+#define LINCORE_ICACHE_LIMIT 0x85FFFFFF
+
+/* The core can support locked dcache lines in this region */
+#define LINCORE_DCACHE_BASE  0x86000000
+#define LINCORE_DCACHE_LIMIT 0x87FFFFFF
+
+#define LINGLOBAL_BASE  0x88000000
+#define LINGLOBAL_LIMIT 0xFFFDFFFF
+
+/*
+ * CHIP Core Register Map
+ * ----------------------
+ */
+#define CORE_HWBASE     0x04800000
+#define PRIV_HWBASE     0x04810000
+#define TRIG_HWBASE     0x04820000
+#define SYSC_HWBASE     0x04830000
+
+/*****************************************************************************
+ *         INTER-THREAD KICK REGISTERS FOR SOFTWARE EVENT GENERATION
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that can be used to supply
+ * kicks to threads that service arbitrary software events.
+ */
+
+#define T0KICK     0x04800800   /* Background kick 0     */
+#define     TXXKICK_MAX 0xFFFF  /* Maximum kicks */
+#define     TnXKICK_STRIDE      0x00001000  /* Thread scale value    */
+#define     TnXKICK_STRIDE_S    12
+#define T0KICKI    0x04800808   /* Interrupt kick 0      */
+#define     TXIKICK_OFFSET  0x00000008  /* Int level offset value */
+#define T1KICK     0x04801800   /* Background kick 1     */
+#define T1KICKI    0x04801808   /* Interrupt kick 1      */
+#define T2KICK     0x04802800   /* Background kick 2     */
+#define T2KICKI    0x04802808   /* Interrupt kick 2      */
+#define T3KICK     0x04803800   /* Background kick 3     */
+#define T3KICKI    0x04803808   /* Interrupt kick 3      */
+
+/*****************************************************************************
+ *                GLOBAL REGISTER ACCESS RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that allow access to the
+ * internal state of all threads in order to allow global set-up of thread
+ * state and external handling of thread events, errors, or debugging.
+ *
+ * The actual unit and register index values needed to access individul
+ * registers are chip specific see - METAC_TXUXX_VALUES in metac_x_y.h.
+ * However two C array initialisers TXUXX_MASKS and TGUXX_MASKS will always be
+ * defined to allow arbitrary loading, display, and saving of all valid
+ * register states without detailed knowledge of their purpose - TXUXX sets
+ * bits for all valid registers and TGUXX sets bits for the sub-set which are
+ * global.
+ */
+
+#define T0UCTREG0   0x04800000  /* Access to all CT regs */
+#define TnUCTRX_STRIDE      0x00001000  /* Thread scale value    */
+#define TXUCTREGn_STRIDE    0x00000008  /* Register scale value  */
+
+#define TXUXXRXDT  0x0480FFF0   /* Data to/from any threads reg */
+#define TXUXXRXRQ  0x0480FFF8
+#define     TXUXXRXRQ_DREADY_BIT 0x80000000  /* Poll for done */
+#define     TXUXXRXRQ_DSPEXT_BIT 0x00020000  /* Addr DSP Regs */
+#define     TXUXXRXRQ_RDnWR_BIT  0x00010000  /* Set for read  */
+#define     TXUXXRXRQ_TX_BITS    0x00003000  /* Thread number */
+#define     TXUXXRXRQ_TX_S       12
+#define     TXUXXRXRQ_RX_BITS    0x000001F0  /* Register num  */
+#define     TXUXXRXRQ_RX_S       4
+#define         TXUXXRXRQ_DSPRARD0    0      /* DSP RAM A Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRARD1    1      /* DSP RAM A Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRAWR0    2      /* DSP RAM A Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRAWR2    3      /* DSP RAM A Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRBRD0    4      /* DSP RAM B Read Pointer 0 */
+#define         TXUXXRXRQ_DSPRBRD1    5      /* DSP RAM B Read Pointer 1 */
+#define         TXUXXRXRQ_DSPRBWR0    6      /* DSP RAM B Write Pointer 0 */
+#define         TXUXXRXRQ_DSPRBWR1    7      /* DSP RAM B Write Pointer 1 */
+#define         TXUXXRXRQ_DSPRARINC0  8      /* DSP RAM A Read Increment 0 */
+#define         TXUXXRXRQ_DSPRARINC1  9      /* DSP RAM A Read Increment 1 */
+#define         TXUXXRXRQ_DSPRAWINC0 10      /* DSP RAM A Write Increment 0 */
+#define         TXUXXRXRQ_DSPRAWINC1 11      /* DSP RAM A Write Increment 1 */
+#define         TXUXXRXRQ_DSPRBRINC0 12      /* DSP RAM B Read Increment 0 */
+#define         TXUXXRXRQ_DSPRBRINC1 13      /* DSP RAM B Read Increment 1 */
+#define         TXUXXRXRQ_DSPRBWINC0 14      /* DSP RAM B Write Increment 0 */
+#define         TXUXXRXRQ_DSPRBWINC1 15      /* DSP RAM B Write Increment 1 */
+
+#define         TXUXXRXRQ_ACC0L0     16      /* Accumulator 0 bottom 32-bits */
+#define         TXUXXRXRQ_ACC1L0     17      /* Accumulator 1 bottom 32-bits */
+#define         TXUXXRXRQ_ACC2L0     18      /* Accumulator 2 bottom 32-bits */
+#define         TXUXXRXRQ_ACC3L0     19      /* Accumulator 3 bottom 32-bits */
+#define         TXUXXRXRQ_ACC0HI     20      /* Accumulator 0 top 8-bits */
+#define         TXUXXRXRQ_ACC1HI     21      /* Accumulator 1 top 8-bits */
+#define         TXUXXRXRQ_ACC2HI     22      /* Accumulator 2 top 8-bits */
+#define         TXUXXRXRQ_ACC3HI     23      /* Accumulator 3 top 8-bits */
+#define     TXUXXRXRQ_UXX_BITS   0x0000000F  /* Unit number   */
+#define     TXUXXRXRQ_UXX_S      0
+
+/*****************************************************************************
+ *          PRIVILEGE CONTROL VALUES FOR MEMORY MAPPED RESOURCES
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the privilege required to access other memory mapped resources. These
+ * registers themselves always require privilege to update them.
+ */
+
+#define TXPRIVREG_STRIDE    0x8 /* Delta between per-thread regs */
+#define TXPRIVREG_STRIDE_S  3
+
+/*
+ * Each bit 0 to 15 defines privilege required to access internal register
+ * regions 0x04800000 to 0x048FFFFF in 64k chunks
+ */
+#define T0PIOREG    0x04810100
+#define T1PIOREG    0x04810108
+#define T2PIOREG    0x04810110
+#define T3PIOREG    0x04810118
+
+/*
+ * Each bit 0 to 31 defines privilege required to use the pair of
+ * system events implemented as writee in the regions 0x04000000 to
+ * 0x04000FFF in 2*64 byte chunks.
+ */
+#define T0PSYREG    0x04810180
+#define T1PSYREG    0x04810188
+#define T2PSYREG    0x04810190
+#define T3PSYREG    0x04810198
+
+/*
+ * CHIP PRIV CONTROLS
+ * ------------------
+ */
+
+/* The TXPIOREG register holds a bit mask directly mappable to
+   corresponding addresses in the range 0x04800000 to 049FFFFF */
+#define     TXPIOREG_ADDR_BITS  0x1F0000 /* Up to 32x64K bytes */
+#define     TXPIOREG_ADDR_S     16
+
+/* Hence based on the _HWBASE values ... */
+#define     TXPIOREG_CORE_BIT       (1<<((0x04800000>>16)&0x1F))
+#define     TXPIOREG_PRIV_BIT       (1<<((0x04810000>>16)&0x1F))
+#define     TXPIOREG_TRIG_BIT       (1<<((0x04820000>>16)&0x1F))
+#define     TXPIOREG_SYSC_BIT       (1<<((0x04830000>>16)&0x1F))
+
+#define     TXPIOREG_WRC_BIT          0x00080000  /* Wr combiner reg priv */
+#define     TXPIOREG_LOCALBUS_RW_BIT  0x00040000  /* Local bus rd/wr priv */
+#define     TXPIOREG_SYSREGBUS_RD_BIT 0x00020000  /* Sys reg bus write priv */
+#define     TXPIOREG_SYSREGBUS_WR_BIT 0x00010000  /* Sys reg bus read priv */
+
+/* CORE region privilege controls */
+#define T0PRIVCORE 0x04800828
+#define         TXPRIVCORE_TXBKICK_BIT   0x001  /* Background kick priv */
+#define         TXPRIVCORE_TXIKICK_BIT   0x002  /* Interrupt kick priv  */
+#define         TXPRIVCORE_TXAMAREGX_BIT 0x004  /* TXAMAREG4|5|6 priv   */
+#define TnPRIVCORE_STRIDE 0x00001000
+
+#define T0PRIVSYSR 0x04810000
+#define     TnPRIVSYSR_STRIDE   0x00000008
+#define     TnPRIVSYSR_STRIDE_S 3
+#define     TXPRIVSYSR_CFLUSH_BIT     0x01
+#define     TXPRIVSYSR_MTABLE_BIT     0x02
+#define     TXPRIVSYSR_DIRECT_BIT     0x04
+#ifdef METAC_1_2
+#define     TXPRIVSYSR_ALL_BITS       0x07
+#else
+#define     TXPRIVSYSR_CORE_BIT       0x08
+#define     TXPRIVSYSR_CORECODE_BIT   0x10
+#define     TXPRIVSYSR_ALL_BITS       0x1F
+#endif
+#define T1PRIVSYSR 0x04810008
+#define T2PRIVSYSR 0x04810010
+#define T3PRIVSYSR 0x04810018
+
+/*****************************************************************************
+ *          H/W TRIGGER STATE/LEVEL REGISTERS AND H/W TRIGGER VECTORS
+ ****************************************************************************/
+/*
+ * These values define memory mapped registers that give control over and
+ * the state of hardware trigger sources both external to the META processor
+ * and internal to it.
+ */
+
+#define HWSTATMETA  0x04820000  /* Hardware status/clear META trig */
+#define         HWSTATMETA_T0HALT_BITS 0xF
+#define         HWSTATMETA_T0HALT_S    0
+#define     HWSTATMETA_T0BHALT_BIT 0x1  /* Background HALT */
+#define     HWSTATMETA_T0IHALT_BIT 0x2  /* Interrupt HALT  */
+#define     HWSTATMETA_T0PHALT_BIT 0x4  /* PF/RO Memory HALT */
+#define     HWSTATMETA_T0AMATR_BIT 0x8  /* AMA trigger */
+#define     HWSTATMETA_TnINT_S     4    /* Shift by (thread*4) */
+#define HWSTATEXT   0x04820010  /* H/W status/clear external trigs  0-31 */
+#define HWSTATEXT2  0x04820018  /* H/W status/clear external trigs 32-63 */
+#define HWSTATEXT4  0x04820020  /* H/W status/clear external trigs 64-95 */
+#define HWSTATEXT6  0x04820028  /* H/W status/clear external trigs 96-128 */
+#define HWLEVELEXT  0x04820030  /* Edge/Level type of external trigs  0-31 */
+#define HWLEVELEXT2 0x04820038  /* Edge/Level type of external trigs 32-63 */
+#define HWLEVELEXT4 0x04820040  /* Edge/Level type of external trigs 64-95 */
+#define HWLEVELEXT6 0x04820048  /* Edge/Level type of external trigs 96-128 */
+#define     HWLEVELEXT_XXX_LEVEL 1  /* Level sense logic in HWSTATEXTn */
+#define     HWLEVELEXT_XXX_EDGE  0
+#define HWMASKEXT   0x04820050  /* Enable/disable of external trigs  0-31 */
+#define HWMASKEXT2  0x04820058  /* Enable/disable of external trigs 32-63 */
+#define HWMASKEXT4  0x04820060  /* Enable/disable of external trigs 64-95 */
+#define HWMASKEXT6  0x04820068  /* Enable/disable of external trigs 96-128 */
+#define T0VECINT_BHALT  0x04820500  /* Background HALT trigger vector */
+#define     TXVECXXX_BITS   0xF       /* Per-trigger vector vals 0,1,4-15 */
+#define     TXVECXXX_S  0
+#define T0VECINT_IHALT  0x04820508  /* Interrupt HALT */
+#define T0VECINT_PHALT  0x04820510  /* PF/RO memory fault */
+#define T0VECINT_AMATR  0x04820518  /* AMA trigger */
+#define     TnVECINT_STRIDE 0x00000020  /* Per thread stride */
+#define HWVEC0EXT   0x04820700  /* Vectors for external triggers  0-31 */
+#define HWVEC20EXT  0x04821700  /* Vectors for external triggers 32-63 */
+#define HWVEC40EXT  0x04822700  /* Vectors for external triggers 64-95 */
+#define HWVEC60EXT  0x04823700  /* Vectors for external triggers 96-127 */
+#define     HWVECnEXT_STRIDE 0x00000008 /* Per trigger stride */
+#define HWVECnEXT_DEBUG 0x1         /* Redirect trigger to debug i/f */
+
+/*
+ * CORE HWCODE-BREAKPOINT REGISTERS/VALUES
+ * ---------------------------------------
+ */
+#define CODEB0ADDR         0x0480FF00  /* Address specifier */
+#define     CODEBXADDR_MATCHX_BITS 0xFFFFFFFC
+#define     CODEBXADDR_MATCHX_S    2
+#define CODEB0CTRL         0x0480FF08  /* Control */
+#define     CODEBXCTRL_MATEN_BIT   0x80000000   /* Match 'Enable'  */
+#define     CODEBXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     CODEBXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     CODEBXCTRL_HITC_S      16
+#define           CODEBXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           CODEBXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     CODEBXCTRL_MMASK_BITS  0x0000FFFC   /* Mask ADDR_MATCH bits */
+#define     CODEBXCTRL_MMASK_S     2
+#define     CODEBXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     CODEBXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define CODEBnXXXX_STRIDE      0x00000010  /* Stride between CODEB reg sets */
+#define CODEBnXXXX_STRIDE_S    4
+#define CODEBnXXXX_LIMIT       3           /* Sets 0-3 */
+
+/*
+ * CORE DATA-WATCHPOINT REGISTERS/VALUES
+ * -------------------------------------
+ */
+#define DATAW0ADDR         0x0480FF40  /* Address specifier */
+#define     DATAWXADDR_MATCHR_BITS 0xFFFFFFF8
+#define     DATAWXADDR_MATCHR_S    3
+#define     DATAWXADDR_MATCHW_BITS 0xFFFFFFFF
+#define     DATAWXADDR_MATCHW_S    0
+#define DATAW0CTRL         0x0480FF48  /* Control */
+#define     DATAWXCTRL_MATRD_BIT   0x80000000   /* Match 'Read'  */
+#ifndef METAC_1_2
+#define     DATAWXCTRL_MATNOTTX_BIT 0x20000000  /* Invert threadn enable */
+#endif
+#define     DATAWXCTRL_MATWR_BIT   0x40000000   /* Match 'Write' */
+#define     DATAWXCTRL_MATTXEN_BIT 0x10000000   /* Match threadn enable */
+#define     DATAWXCTRL_WRSIZE_BITS 0x0F000000   /* Write Match Size */
+#define     DATAWXCTRL_WRSIZE_S    24
+#define         DATAWWRSIZE_ANY   0         /* Any size transaction matches */
+#define         DATAWWRSIZE_8BIT  1     /* Specific sizes ... */
+#define         DATAWWRSIZE_16BIT 2
+#define         DATAWWRSIZE_32BIT 3
+#define         DATAWWRSIZE_64BIT 4
+#define     DATAWXCTRL_HITC_BITS   0x00FF0000   /* Hit counter   */
+#define     DATAWXCTRL_HITC_S      16
+#define           DATAWXHITC_NEXT  0xFF     /* Next 'hit' will trigger */
+#define           DATAWXHITC_HIT1  0x00     /* No 'hits' after trigger */
+#define     DATAWXCTRL_MMASK_BITS 0x0000FFF8    /* Mask ADDR_MATCH bits */
+#define     DATAWXCTRL_MMASK_S    3
+#define     DATAWXCTRL_MATLTX_BITS 0x00000003   /* Match threadn LOCAL addr */
+#define     DATAWXCTRL_MATLTX_S    0            /* Match threadn LOCAL addr */
+#define DATAW0DMATCH0       0x0480FF50 /* Write match data */
+#define DATAW0DMATCH1       0x0480FF58
+#define DATAW0DMASK0        0x0480FF60 /* Write match data mask */
+#define DATAW0DMASK1        0x0480FF68
+#define DATAWnXXXX_STRIDE      0x00000040  /* Stride between DATAW reg sets */
+#define DATAWnXXXX_STRIDE_S    6
+#define DATAWnXXXX_LIMIT       1           /* Sets 0,1 */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CORE memory mapped AMA registers */
+#define T0AMAREG4   0x04800810
+#define     TXAMAREG4_POOLSIZE_BITS 0x3FFFFF00
+#define     TXAMAREG4_POOLSIZE_S    8
+#define     TXAMAREG4_AVALUE_BITS   0x000000FF
+#define     TXAMAREG4_AVALUE_S  0
+#define T0AMAREG5   0x04800818
+#define     TXAMAREG5_POOLC_BITS    0x07FFFFFF
+#define         TXAMAREG5_POOLC_S       0
+#define T0AMAREG6   0x04800820
+#define     TXAMAREG6_DLINEDEF_BITS 0x00FFFFF0
+#define         TXAMAREG6_DLINEDEF_S    0
+#define TnAMAREGX_STRIDE    0x00001000
+
+/*
+ * Memory Management Control Unit Table Entries
+ * --------------------------------------------
+ */
+#define MMCU_ENTRY_S         4            /* -> Entry size                */
+#define MMCU_ENTRY_ADDR_BITS 0xFFFFF000   /* Physical address             */
+#define MMCU_ENTRY_ADDR_S    12           /* -> Page size                 */
+#define MMCU_ENTRY_CWIN_BITS 0x000000C0   /* Caching 'window' selection   */
+#define MMCU_ENTRY_CWIN_S    6
+#define     MMCU_CWIN_UNCACHED  0 /* May not be memory etc.  */
+#define     MMCU_CWIN_BURST     1 /* Cached but LRU unset */
+#define     MMCU_CWIN_C1SET     2 /* Cached in 1 set only */
+#define     MMCU_CWIN_CACHED    3 /* Fully cached            */
+#define MMCU_ENTRY_CACHE_BIT 0x00000080   /* Set for cached region         */
+#define     MMCU_ECACHE1_FULL_BIT  0x00000040 /* Use all the sets */
+#define     MMCU_ECACHE0_BURST_BIT 0x00000040 /* Match bursts     */
+#define MMCU_ENTRY_SYS_BIT   0x00000010   /* Sys-coherent access required  */
+#define MMCU_ENTRY_WRC_BIT   0x00000008   /* Write combining allowed       */
+#define MMCU_ENTRY_PRIV_BIT  0x00000004   /* Privilege required            */
+#define MMCU_ENTRY_WR_BIT    0x00000002   /* Writes allowed                */
+#define MMCU_ENTRY_VAL_BIT   0x00000001   /* Entry is valid                */
+
+#ifdef METAC_2_1
+/*
+ * Extended first-level/top table entries have extra/larger fields in later
+ * cores as bits 11:0 previously had no effect in such table entries.
+ */
+#define MMCU_E1ENT_ADDR_BITS 0xFFFFFFC0   /* Physical address             */
+#define MMCU_E1ENT_ADDR_S    6            /*   -> resolution < page size  */
+#define MMCU_E1ENT_PGSZ_BITS 0x0000001E   /* Page size for 2nd level      */
+#define MMCU_E1ENT_PGSZ_S    1
+#define     MMCU_E1ENT_PGSZ0_POWER   12   /* PgSz  0 -> 4K */
+#define     MMCU_E1ENT_PGSZ_MAX      10   /* PgSz 10 -> 4M maximum */
+#define MMCU_E1ENT_MINIM_BIT 0x00000020
+#endif /* METAC_2_1 */
+
+/* MMCU control register in SYSC region */
+#define MMCU_TABLE_PHYS_ADDR        0x04830010
+#define     MMCU_TABLE_PHYS_ADDR_BITS   0xFFFFFFFC
+#ifdef METAC_2_1
+#define     MMCU_TABLE_PHYS_EXTEND      0x00000001     /* See below */
+#endif
+#define MMCU_DCACHE_CTRL_ADDR       0x04830018
+#define     MMCU_xCACHE_CTRL_ENABLE_BIT     0x00000001
+#define     MMCU_xCACHE_CTRL_PARTITION_BIT  0x00000000 /* See xCPART below */
+#define MMCU_ICACHE_CTRL_ADDR       0x04830020
+
+#ifdef METAC_2_1
+
+/*
+ * Allow direct access to physical memory used to implement MMU table.
+ *
+ * Each is based on a corresponding MMCU_TnLOCAL_TABLE_PHYSn or similar
+ *    MMCU_TnGLOBAL_TABLE_PHYSn register pair (see next).
+ */
+#define LINSYSMEMT0L_BASE   0x05000000
+#define LINSYSMEMT0L_LIMIT  0x051FFFFF
+#define     LINSYSMEMTnX_STRIDE     0x00200000  /*  2MB Local per thread */
+#define     LINSYSMEMTnX_STRIDE_S   21
+#define     LINSYSMEMTXG_OFFSET     0x00800000  /* +2MB Global per thread */
+#define     LINSYSMEMTXG_OFFSET_S   23
+#define LINSYSMEMT1L_BASE   0x05200000
+#define LINSYSMEMT1L_LIMIT  0x053FFFFF
+#define LINSYSMEMT2L_BASE   0x05400000
+#define LINSYSMEMT2L_LIMIT  0x055FFFFF
+#define LINSYSMEMT3L_BASE   0x05600000
+#define LINSYSMEMT3L_LIMIT  0x057FFFFF
+#define LINSYSMEMT0G_BASE   0x05800000
+#define LINSYSMEMT0G_LIMIT  0x059FFFFF
+#define LINSYSMEMT1G_BASE   0x05A00000
+#define LINSYSMEMT1G_LIMIT  0x05BFFFFF
+#define LINSYSMEMT2G_BASE   0x05C00000
+#define LINSYSMEMT2G_LIMIT  0x05DFFFFF
+#define LINSYSMEMT3G_BASE   0x05E00000
+#define LINSYSMEMT3G_LIMIT  0x05FFFFFF
+
+/*
+ * Extended MMU table functionality allows a sparse or flat table to be
+ * described much more efficiently than before.
+ */
+#define MMCU_T0LOCAL_TABLE_PHYS0    0x04830700
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE    0x20   /* Offset per thread */
+#define   MMCU_TnX_TABLE_PHYSX_STRIDE_S  5
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET    0x10   /* Global versus local */
+#define   MMCU_TXG_TABLE_PHYSX_OFFSET_S  4
+#define     MMCU_TBLPHYS0_DCCTRL_BITS       0x000000DF  /* DC controls  */
+#define     MMCU_TBLPHYS0_ENTLB_BIT         0x00000020  /* Cache in TLB */
+#define     MMCU_TBLPHYS0_TBLSZ_BITS        0x00000F00  /* Area supported */
+#define     MMCU_TBLPHYS0_TBLSZ_S           8
+#define         MMCU_TBLPHYS0_TBLSZ0_POWER      22  /* 0 -> 4M */
+#define         MMCU_TBLPHYS0_TBLSZ_MAX         9   /* 9 -> 2G */
+#define     MMCU_TBLPHYS0_LINBASE_BITS      0xFFC00000  /* Linear base */
+#define     MMCU_TBLPHYS0_LINBASE_S         22
+
+#define MMCU_T0LOCAL_TABLE_PHYS1    0x04830708
+#define     MMCU_TBLPHYS1_ADDR_BITS         0xFFFFFFFC  /* Physical base */
+#define     MMCU_TBLPHYS1_ADDR_S            2
+
+#define MMCU_T0GLOBAL_TABLE_PHYS0   0x04830710
+#define MMCU_T0GLOBAL_TABLE_PHYS1   0x04830718
+#define MMCU_T1LOCAL_TABLE_PHYS0    0x04830720
+#define MMCU_T1LOCAL_TABLE_PHYS1    0x04830728
+#define MMCU_T1GLOBAL_TABLE_PHYS0   0x04830730
+#define MMCU_T1GLOBAL_TABLE_PHYS1   0x04830738
+#define MMCU_T2LOCAL_TABLE_PHYS0    0x04830740
+#define MMCU_T2LOCAL_TABLE_PHYS1    0x04830748
+#define MMCU_T2GLOBAL_TABLE_PHYS0   0x04830750
+#define MMCU_T2GLOBAL_TABLE_PHYS1   0x04830758
+#define MMCU_T3LOCAL_TABLE_PHYS0    0x04830760
+#define MMCU_T3LOCAL_TABLE_PHYS1    0x04830768
+#define MMCU_T3GLOBAL_TABLE_PHYS0   0x04830770
+#define MMCU_T3GLOBAL_TABLE_PHYS1   0x04830778
+
+#define MMCU_T0EBWCCTRL             0x04830640
+#define     MMCU_TnEBWCCTRL_BITS    0x00000007
+#define     MMCU_TnEBWCCTRL_S       0
+#define         MMCU_TnEBWCCCTRL_DISABLE_ALL 0
+#define         MMCU_TnEBWCCCTRL_ABIT25      1
+#define         MMCU_TnEBWCCCTRL_ABIT26      2
+#define         MMCU_TnEBWCCCTRL_ABIT27      3
+#define         MMCU_TnEBWCCCTRL_ABIT28      4
+#define         MMCU_TnEBWCCCTRL_ABIT29      5
+#define         MMCU_TnEBWCCCTRL_ABIT30      6
+#define         MMCU_TnEBWCCCTRL_ENABLE_ALL  7
+#define MMCU_TnEBWCCTRL_STRIDE      8
+
+#endif /* METAC_2_1 */
+
+
+/* Registers within the SYSC register region */
+#define METAC_ID                0x04830000
+#define     METAC_ID_MAJOR_BITS     0xFF000000
+#define     METAC_ID_MAJOR_S        24
+#define     METAC_ID_MINOR_BITS     0x00FF0000
+#define     METAC_ID_MINOR_S        16
+#define     METAC_ID_REV_BITS       0x0000FF00
+#define     METAC_ID_REV_S          8
+#define     METAC_ID_MAINT_BITS     0x000000FF
+#define     METAC_ID_MAINT_S        0
+
+#ifdef METAC_2_1
+/* Use of this section is strongly deprecated */
+#define METAC_ID2               0x04830008
+#define     METAC_ID2_DESIGNER_BITS 0xFFFF0000  /* Modified by customer */
+#define     METAC_ID2_DESIGNER_S    16
+#define     METAC_ID2_MINOR2_BITS   0x00000F00  /* 3rd digit of prod rev */
+#define     METAC_ID2_MINOR2_S      8
+#define     METAC_ID2_CONFIG_BITS   0x000000FF  /* Wrapper configuration */
+#define     METAC_ID2_CONFIG_S      0
+
+/* Primary core identification and configuration information */
+#define METAC_CORE_ID           0x04831000
+#define     METAC_COREID_GROUP_BITS   0xFF000000
+#define     METAC_COREID_GROUP_S      24
+#define         METAC_COREID_GROUP_METAG  0x14
+#define     METAC_COREID_ID_BITS      0x00FF0000
+#define     METAC_COREID_ID_S         16
+#define         METAC_COREID_ID_W32       0x10   /* >= for 32-bit pipeline */
+#define     METAC_COREID_CONFIG_BITS  0x0000FFFF
+#define     METAC_COREID_CONFIG_S     0
+#define       METAC_COREID_CFGCACHE_BITS    0x0007
+#define       METAC_COREID_CFGCACHE_S       0
+#define           METAC_COREID_CFGCACHE_NOM       0
+#define           METAC_COREID_CFGCACHE_TYPE0     1
+#define           METAC_COREID_CFGCACHE_NOMMU     1 /* Alias for TYPE0 */
+#define           METAC_COREID_CFGCACHE_NOCACHE   2
+#define           METAC_COREID_CFGCACHE_PRIVNOMMU 3
+#define       METAC_COREID_CFGDSP_BITS      0x0038
+#define       METAC_COREID_CFGDSP_S         3
+#define           METAC_COREID_CFGDSP_NOM       0
+#define           METAC_COREID_CFGDSP_MIN       1
+#define       METAC_COREID_NOFPACC_BIT      0x0040 /* Set if no FPU accum */
+#define       METAC_COREID_CFGFPU_BITS      0x0180
+#define       METAC_COREID_CFGFPU_S         7
+#define           METAC_COREID_CFGFPU_NOM       0
+#define           METAC_COREID_CFGFPU_SNGL      1
+#define           METAC_COREID_CFGFPU_DBL       2
+#define       METAC_COREID_NOAMA_BIT        0x0200 /* Set if no AMA present */
+#define       METAC_COREID_NOCOH_BIT        0x0400 /* Set if no Gbl coherency */
+
+/* Core revision information */
+#define METAC_CORE_REV          0x04831008
+#define     METAC_COREREV_DESIGN_BITS   0xFF000000
+#define     METAC_COREREV_DESIGN_S      24
+#define     METAC_COREREV_MAJOR_BITS    0x00FF0000
+#define     METAC_COREREV_MAJOR_S       16
+#define     METAC_COREREV_MINOR_BITS    0x0000FF00
+#define     METAC_COREREV_MINOR_S       8
+#define     METAC_COREREV_MAINT_BITS    0x000000FF
+#define     METAC_COREREV_MAINT_S       0
+
+/* Configuration information control outside the core */
+#define METAC_CORE_DESIGNER1    0x04831010      /* Arbitrary value */
+#define METAC_CORE_DESIGNER2    0x04831018      /* Arbitrary value */
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG2      0x04831020
+#define     METAC_CORECFG2_COREDBGTYPE_BITS 0x60000000   /* Core debug type */
+#define     METAC_CORECFG2_COREDBGTYPE_S    29
+#define     METAC_CORECFG2_DCSMALL_BIT      0x04000000   /* Data cache small */
+#define     METAC_CORECFG2_ICSMALL_BIT      0x02000000   /* Inst cache small */
+#define     METAC_CORECFG2_DCSZNP_BITS      0x01C00000   /* Data cache size np */
+#define     METAC_CORECFG2_DCSZNP_S         22
+#define     METAC_CORECFG2_ICSZNP_BITS      0x00380000  /* Inst cache size np */
+#define     METAC_CORECFG2_ICSZNP_S         19
+#define     METAC_CORECFG2_DCSZ_BITS        0x00070000   /* Data cache size */
+#define     METAC_CORECFG2_DCSZ_S           16
+#define         METAC_CORECFG2_xCSZ_4K          0        /* Allocated values */
+#define         METAC_CORECFG2_xCSZ_8K          1
+#define         METAC_CORECFG2_xCSZ_16K         2
+#define         METAC_CORECFG2_xCSZ_32K         3
+#define         METAC_CORECFG2_xCSZ_64K         4
+#define     METAC_CORE_C2ICSZ_BITS          0x0000E000   /* Inst cache size */
+#define     METAC_CORE_C2ICSZ_S             13
+#define     METAC_CORE_GBLACC_BITS          0x00001800   /* Number of Global Acc */
+#define     METAC_CORE_GBLACC_S             11
+#define     METAC_CORE_GBLDXR_BITS          0x00000700   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLDXR_S             8
+#define     METAC_CORE_GBLAXR_BITS          0x000000E0   /* 0 -> 0, R -> 2^(R-1) */
+#define     METAC_CORE_GBLAXR_S             5
+#define     METAC_CORE_RTTRACE_BIT          0x00000010
+#define     METAC_CORE_WATCHN_BITS          0x0000000C   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_WATCHN_S             2
+#define     METAC_CORE_BREAKN_BITS          0x00000003   /* 0 -> 0, N -> 2^N */
+#define     METAC_CORE_BREAKN_S             0
+
+/* Configuration information covering presence/number of various features */
+#define METAC_CORE_CONFIG3      0x04831028
+#define     METAC_CORECFG3_L2C_REV_ID_BITS          0x000F0000   /* Revision of L2 cache */
+#define     METAC_CORECFG3_L2C_REV_ID_S             16
+#define     METAC_CORECFG3_L2C_LINE_SIZE_BITS       0x00003000   /* L2 line size */
+#define     METAC_CORECFG3_L2C_LINE_SIZE_S          12
+#define         METAC_CORECFG3_L2C_LINE_SIZE_64B    0x0          /* 64 bytes */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_BITS        0x00000F00   /* L2 number of ways (2^n) */
+#define     METAC_CORECFG3_L2C_NUM_WAYS_S           8
+#define     METAC_CORECFG3_L2C_SIZE_BITS            0x000000F0   /* L2 size (2^n) */
+#define     METAC_CORECFG3_L2C_SIZE_S               4
+#define     METAC_CORECFG3_L2C_UNIFIED_BIT          0x00000004   /* Unified cache: */
+#define     METAC_CORECFG3_L2C_UNIFIED_S            2
+#define       METAC_CORECFG3_L2C_UNIFIED_UNIFIED    1            /* - Unified D/I cache */
+#define       METAC_CORECFG3_L2C_UNIFIED_SEPARATE   0            /* - Separate D/I cache */
+#define     METAC_CORECFG3_L2C_MODE_BIT             0x00000002   /* Cache Mode: */
+#define     METAC_CORECFG3_L2C_MODE_S               1
+#define       METAC_CORECFG3_L2C_MODE_WRITE_BACK    1            /* - Write back */
+#define       METAC_CORECFG3_L2C_MODE_WRITE_THROUGH 0            /* - Write through */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_BIT         0x00000001   /* Have L2C */
+#define     METAC_CORECFG3_L2C_HAVE_L2C_S           0
+
+#endif /* METAC_2_1 */
+
+#define SYSC_CACHE_MMU_CONFIG       0x04830028
+#ifdef METAC_2_1
+#define     SYSC_CMMUCFG_DCSKEWABLE_BIT 0x00000040
+#define     SYSC_CMMUCFG_ICSKEWABLE_BIT 0x00000020
+#define     SYSC_CMMUCFG_DCSKEWOFF_BIT  0x00000010  /* Skew association override  */
+#define     SYSC_CMMUCFG_ICSKEWOFF_BIT  0x00000008  /* -> default 0 on if present */
+#define     SYSC_CMMUCFG_MODE_BITS      0x00000007  /* Access to old state */
+#define     SYSC_CMMUCFG_MODE_S         0
+#define         SYSC_CMMUCFG_ON             0x7
+#define         SYSC_CMMUCFG_EBYPASS        0x6   /* Enhanced by-pass mode */
+#define         SYSC_CMMUCFG_EBYPASSIC      0x4   /* EB just inst cache */
+#define         SYSC_CMMUCFG_EBYPASSDC      0x2   /* EB just data cache */
+#endif /* METAC_2_1 */
+/* Old definitions, Keep them for now */
+#define         SYSC_CMMUCFG_MMU_ON_BIT     0x1
+#define         SYSC_CMMUCFG_DC_ON_BIT      0x2
+#define         SYSC_CMMUCFG_IC_ON_BIT      0x4
+
+#define SYSC_JTAG_THREAD            0x04830030
+#define     SYSC_JTAG_TX_BITS           0x00000003 /* Read only bits! */
+#define     SYSC_JTAG_TX_S              0
+#define     SYSC_JTAG_PRIV_BIT          0x00000004
+#ifdef METAC_2_1
+#define     SYSC_JTAG_SLAVETX_BITS      0x00000018
+#define     SYSC_JTAG_SLAVETX_S         3
+#endif /* METAC_2_1 */
+
+#define SYSC_DCACHE_FLUSH           0x04830038
+#define SYSC_ICACHE_FLUSH           0x04830040
+#define  SYSC_xCACHE_FLUSH_INIT     0x1
+#define MMCU_DIRECTMAP0_ADDR        0x04830080 /* LINSYSDIRECT_BASE -> */
+#define     MMCU_DIRECTMAPn_STRIDE      0x00000010 /* 4 Region settings */
+#define     MMCU_DIRECTMAPn_S           4
+#define         MMCU_DIRECTMAPn_ADDR_BITS       0xFF800000
+#define         MMCU_DIRECTMAPn_ADDR_S          23
+#define         MMCU_DIRECTMAPn_ADDR_SCALE      0x00800000 /* 8M Regions */
+#ifdef METAC_2_1
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ *       (LSB similar to VALID must be set for enhancments to be active)
+ */
+#define         MMCU_DIRECTMAPn_ENHANCE_BIT     0x00000001 /* 0 = no optim */
+#define         MMCU_DIRECTMAPn_DCCTRL_BITS     0x000000DF /* Get DC Ctrl */
+#define         MMCU_DIRECTMAPn_DCCTRL_S        0
+#define         MMCU_DIRECTMAPn_ICCTRL_BITS     0x0000C000 /* Get IC Ctrl */
+#define         MMCU_DIRECTMAPn_ICCTRL_S        8
+#define         MMCU_DIRECTMAPn_ENTLB_BIT       0x00000020 /* Cache in TLB */
+#define         MMCU_DIRECTMAPn_ICCWIN_BITS     0x0000C000 /* Get IC Win Bits */
+#define         MMCU_DIRECTMAPn_ICCWIN_S        14
+#endif /* METAC_2_1 */
+
+#define MMCU_DIRECTMAP1_ADDR        0x04830090
+#define MMCU_DIRECTMAP2_ADDR        0x048300a0
+#define MMCU_DIRECTMAP3_ADDR        0x048300b0
+
+/*
+ * These bits partion each threads use of data cache or instruction cache
+ * resource by modifying the top 4 bits of the address within the cache
+ * storage area.
+ */
+#define SYSC_DCPART0 0x04830200
+#define     SYSC_xCPARTn_STRIDE   0x00000008
+#define     SYSC_xCPARTL_AND_BITS 0x0000000F /* Masks top 4 bits */
+#define     SYSC_xCPARTL_AND_S    0
+#define     SYSC_xCPARTG_AND_BITS 0x00000F00 /* Masks top 4 bits */
+#define     SYSC_xCPARTG_AND_S    8
+#define     SYSC_xCPARTL_OR_BITS  0x000F0000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTL_OR_S     16
+#define     SYSC_xCPARTG_OR_BITS  0x0F000000 /* Ors into top 4 bits */
+#define     SYSC_xCPARTG_OR_S     24
+#define     SYSC_CWRMODE_BIT      0x80000000 /* Write cache mode bit */
+
+#define SYSC_DCPART1 0x04830208
+#define SYSC_DCPART2 0x04830210
+#define SYSC_DCPART3 0x04830218
+#define SYSC_ICPART0 0x04830220
+#define SYSC_ICPART1 0x04830228
+#define SYSC_ICPART2 0x04830230
+#define SYSC_ICPART3 0x04830238
+
+/*
+ * META Core Memory and Cache Update registers
+ */
+#define SYSC_MCMDATAX  0x04830300   /* 32-bit read/write data register */
+#define SYSC_MCMDATAT  0x04830308   /* Read or write data triggers oper */
+#define SYSC_MCMGCTRL  0x04830310   /* Control register */
+#define     SYSC_MCMGCTRL_READ_BIT  0x00000001 /* Set to issue 1st read */
+#define     SYSC_MCMGCTRL_AINC_BIT  0x00000002 /* Set for auto-increment */
+#define     SYSC_MCMGCTRL_ADDR_BITS 0x000FFFFC /* Address or index */
+#define     SYSC_MCMGCTRL_ADDR_S    2
+#define     SYSC_MCMGCTRL_ID_BITS   0x0FF00000 /* Internal memory block Id */
+#define     SYSC_MCMGCTRL_ID_S      20
+#define         SYSC_MCMGID_NODEV       0xFF /* No Device Selected */
+#define         SYSC_MCMGID_DSPRAM0A    0x04 /* DSP RAM D0 block A access */
+#define         SYSC_MCMGID_DSPRAM0B    0x05 /* DSP RAM D0 block B access */
+#define         SYSC_MCMGID_DSPRAM1A    0x06 /* DSP RAM D1 block A access */
+#define         SYSC_MCMGID_DSPRAM1B    0x07 /* DSP RAM D1 block B access */
+#define         SYSC_MCMGID_DCACHEL     0x08 /* DCACHE lines (64-bytes/line) */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHETLB   0x09 /* DCACHE TLB ( Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_DCACHET     0x0A /* DCACHE tags (32-bits/line)   */
+#define         SYSC_MCMGID_DCACHELRU   0x0B /* DCACHE LRU (8-bits/line)     */
+#define         SYSC_MCMGID_ICACHEL     0x0C /* ICACHE lines (64-bytes/line  */
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_ICACHETLB   0x0D /* ICACHE TLB (Read Only )     */
+#endif /* METAC_2_1 */
+#define         SYSC_MCMGID_ICACHET     0x0E /* ICACHE Tags (32-bits/line)   */
+#define         SYSC_MCMGID_ICACHELRU   0x0F /* ICACHE LRU (8-bits/line )    */
+#define         SYSC_MCMGID_COREIRAM0   0x10 /* Core code mem id 0 */
+#define         SYSC_MCMGID_COREIRAMn   0x17
+#define         SYSC_MCMGID_COREDRAM0   0x18 /* Core data mem id 0 */
+#define         SYSC_MCMGID_COREDRAMn   0x1F
+#ifdef METAC_2_1
+#define         SYSC_MCMGID_DCACHEST    0x20 /* DCACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_ICACHEST    0x21 /* ICACHE ST ( Read Only )      */
+#define         SYSC_MCMGID_DCACHETLBLRU 0x22 /* DCACHE TLB LRU ( Read Only )*/
+#define         SYSC_MCMGID_ICACHETLBLRU 0x23 /* ICACHE TLB LRU( Read Only ) */
+#define         SYSC_MCMGID_DCACHESTLRU 0x24 /* DCACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_ICACHESTLRU 0x25 /* ICACHE ST LRU ( Read Only )  */
+#define         SYSC_MCMGID_DEBUGTLB    0x26 /* DEBUG TLB ( Read Only )      */
+#define         SYSC_MCMGID_DEBUGST     0x27 /* DEBUG ST ( Read Only )       */
+#define         SYSC_MCMGID_L2CACHEL    0x30 /* L2 Cache Lines (64-bytes/line) */
+#define         SYSC_MCMGID_L2CACHET    0x31 /* L2 Cache Tags (32-bits/line) */
+#define         SYSC_MCMGID_COPROX0     0x70 /* Coprocessor port id 0 */
+#define         SYSC_MCMGID_COPROXn     0x77
+#endif /* METAC_2_1 */
+#define     SYSC_MCMGCTRL_TR31_BIT  0x80000000 /* Trigger 31 on completion */
+#define SYSC_MCMSTATUS 0x04830318   /* Status read only */
+#define     SYSC_MCMSTATUS_IDLE_BIT 0x00000001
+
+/* META System Events */
+#define SYSC_SYS_EVENT            0x04830400
+#define     SYSC_SYSEVT_ATOMIC_BIT      0x00000001
+#define     SYSC_SYSEVT_CACHEX_BIT      0x00000002
+#define SYSC_ATOMIC_LOCK          0x04830408
+#define     SYSC_ATOMIC_STATE_TX_BITS 0x0000000F
+#define     SYSC_ATOMIC_STATE_TX_S    0
+#ifdef METAC_1_2
+#define     SYSC_ATOMIC_STATE_DX_BITS 0x000000F0
+#define     SYSC_ATOMIC_STATE_DX_S    4
+#else /* METAC_1_2 */
+#define     SYSC_ATOMIC_SOURCE_BIT    0x00000010
+#endif /* !METAC_1_2 */
+
+
+#ifdef METAC_2_1
+
+/* These definitions replace the EXPAND_TIMER_DIV register defines which are to
+ * be deprecated.
+ */
+#define SYSC_TIMER_DIV            0x04830140
+#define     SYSC_TIMDIV_BITS      0x000000FF
+#define     SYSC_TIMDIV_S         0
+
+/* META Enhanced by-pass control for local and global region */
+#define MMCU_LOCAL_EBCTRL   0x04830600
+#define MMCU_GLOBAL_EBCTRL  0x04830608
+#define     MMCU_EBCTRL_SINGLE_BIT      0x00000020 /* TLB Uncached */
+/*
+ * These fields in the above registers provide MMCU_ENTRY_* values
+ *   for each direct mapped region to enable optimisation of these areas.
+ */
+#define     MMCU_EBCTRL_DCCTRL_BITS     0x000000C0 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_DCCTRL_S        0
+#define     MMCU_EBCTRL_ICCTRL_BITS     0x0000C000 /* Get DC Ctrl */
+#define     MMCU_EBCTRL_ICCTRL_S        8
+
+/* META Cached Core Mode Registers */
+#define MMCU_T0CCM_ICCTRL   0x04830680     /* Core cached code control */
+#define     MMCU_TnCCM_xxCTRL_STRIDE    8
+#define     MMCU_TnCCM_xxCTRL_STRIDE_S  3
+#define MMCU_T1CCM_ICCTRL   0x04830688
+#define MMCU_T2CCM_ICCTRL   0x04830690
+#define MMCU_T3CCM_ICCTRL   0x04830698
+#define MMCU_T0CCM_DCCTRL   0x048306C0     /* Core cached data control */
+#define MMCU_T1CCM_DCCTRL   0x048306C8
+#define MMCU_T2CCM_DCCTRL   0x048306D0
+#define MMCU_T3CCM_DCCTRL   0x048306D8
+#define     MMCU_TnCCM_ENABLE_BIT       0x00000001
+#define     MMCU_TnCCM_WIN3_BIT         0x00000002
+#define     MMCU_TnCCM_DCWRITE_BIT      0x00000004  /* In DCCTRL only */
+#define     MMCU_TnCCM_REGSZ_BITS       0x00000F00
+#define     MMCU_TnCCM_REGSZ_S          8
+#define         MMCU_TnCCM_REGSZ0_POWER      12     /* RegSz 0 -> 4K */
+#define         MMCU_TnCCM_REGSZ_MAXBYTES    0x00080000  /* 512K max */
+#define     MMCU_TnCCM_ADDR_BITS        0xFFFFF000
+#define     MMCU_TnCCM_ADDR_S           12
+
+#endif /* METAC_2_1 */
+
+/*
+ * Hardware performance counter registers
+ * --------------------------------------
+ */
+#ifdef METAC_2_1
+/* Two Performance Counter Internal Core Events Control registers */
+#define PERF_ICORE0   0x0480FFD0
+#define PERF_ICORE1   0x0480FFD8
+#define     PERFI_CTRL_BITS    0x0000000F
+#define     PERFI_CTRL_S       0
+#define         PERFI_CAH_DMISS    0x0  /* Dcache Misses in cache (TLB Hit) */
+#define         PERFI_CAH_IMISS    0x1  /* Icache Misses in cache (TLB Hit) */
+#define         PERFI_TLB_DMISS    0x2  /* Dcache Misses in per-thread TLB */
+#define         PERFI_TLB_IMISS    0x3  /* Icache Misses in per-thread TLB */
+#define         PERFI_TLB_DWRHITS  0x4  /* DC Write-Hits in per-thread TLB */
+#define         PERFI_TLB_DWRMISS  0x5  /* DC Write-Miss in per-thread TLB */
+#define         PERFI_CAH_DLFETCH  0x8  /* DC Read cache line fetch */
+#define         PERFI_CAH_ILFETCH  0x9  /* DC Read cache line fetch */
+#define         PERFI_CAH_DWFETCH  0xA  /* DC Read cache word fetch */
+#define         PERFI_CAH_IWFETCH  0xB  /* DC Read cache word fetch */
+#endif /* METAC_2_1 */
+
+/* Two memory-mapped hardware performance counter registers */
+#define PERF_COUNT0 0x0480FFE0
+#define PERF_COUNT1 0x0480FFE8
+
+/* Fields in PERF_COUNTn registers */
+#define PERF_COUNT_BITS  0x00ffffff /* Event count value */
+
+#define PERF_THREAD_BITS 0x0f000000 /* Thread mask selects threads */
+#define PERF_THREAD_S    24
+
+#define PERF_CTRL_BITS   0xf0000000 /* Event filter control */
+#define PERF_CTRL_S      28
+
+#define    PERFCTRL_SUPER   0  /* Superthread cycles */
+#define    PERFCTRL_REWIND  1  /* Rewinds due to Dcache Misses */
+#ifdef METAC_2_1
+#define    PERFCTRL_SUPREW  2  /* Rewinds of superthreaded cycles (no mask) */
+
+#define    PERFCTRL_CYCLES  3  /* Counts all cycles (no mask) */
+
+#define    PERFCTRL_PREDBC  4  /* Conditional branch predictions */
+#define    PERFCTRL_MISPBC  5  /* Conditional branch mispredictions */
+#define    PERFCTRL_PREDRT  6  /* Return predictions */
+#define    PERFCTRL_MISPRT  7  /* Return mispredictions */
+#endif /* METAC_2_1 */
+
+#define    PERFCTRL_DHITS   8  /* Dcache Hits */
+#define    PERFCTRL_IHITS   9  /* Icache Hits */
+#define    PERFCTRL_IMISS   10 /* Icache Misses in cache or TLB */
+#ifdef METAC_2_1
+#define    PERFCTRL_DCSTALL 11 /* Dcache+TLB o/p delayed (per-thread) */
+#define    PERFCTRL_ICSTALL 12 /* Icache+TLB o/p delayed (per-thread) */
+
+#define    PERFCTRL_INT     13 /* Internal core delailed events (see next) */
+#define    PERFCTRL_EXT     15 /* External source in core periphery */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/* These definitions replace the EXPAND_PERFCHANx register defines which are to
+ * be deprecated.
+ */
+#define PERF_CHAN0 0x04830150
+#define PERF_CHAN1 0x04830158
+#define     PERF_CHAN_BITS    0x0000000F
+#define     PERF_CHAN_S       0
+#define         PERFCHAN_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         PERFCHAN_WRC_WRITE     0x1   /* Write combiner write       */
+#define         PERFCHAN_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         PERFCHAN_WRC_READ      0x3   /* Write combiner read        */
+#define         PERFCHAN_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+                                            /* Cross-bar hold-off cycle:  */
+#define         PERFCHAN_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         PERFCHAN_XBAR_HOLDSBUS 0x6   /*    system bus (ATP only)   */
+#define         PERFCHAN_XBAR_HOLDCREG 0x9   /*    core registers          */
+#define         PERFCHAN_L2C_MISS      0x6   /* L2 Cache miss              */
+#define         PERFCHAN_L2C_HIT       0x7   /* L2 Cache hit               */
+#define         PERFCHAN_L2C_WRITEBACK 0x8   /* L2 Cache writeback         */
+                                            /* Admission delay cycle:     */
+#define         PERFCHAN_INPUT_CREG    0xB   /*    core registers          */
+#define         PERFCHAN_INPUT_INTR    0xC   /*    internal ram            */
+#define         PERFCHAN_INPUT_WRC     0xD   /*    write combiners(memory) */
+
+/* Should following be removed as not in TRM anywhere? */
+#define         PERFCHAN_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         PERFCHAN_INPUT_SBUS    0xA   /*    register port           */
+/* End of remove section. */
+
+#define         PERFCHAN_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Write combiner registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0WRCOMBINE register defines, which will be
+ * deprecated.
+ */
+#define WRCOMB_CONFIG0             0x04830100
+#define     WRCOMB_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#define     WRCOMB_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     WRCOMB_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     WRCOMB_TIMEOUT_COUNT_BITS  0x000003FF
+#define     WRCOMB_TIMEOUT_COUNT_S     0
+#define WRCOMB_CONFIG4             0x04830180
+#define     WRCOMB_PARTALLOC_BITS      0x000000C0
+#define     WRCOMB_PARTALLOC_S         64
+#define     WRCOMB_PARTSIZE_BITS       0x00000030
+#define     WRCOMB_PARTSIZE_S          4
+#define     WRCOMB_PARTOFFSET_BITS     0x0000000F
+#define     WRCOMB_PARTOFFSET_S        0
+#define WRCOMB_CONFIG_STRIDE       8
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Thread arbiter registers
+ * ------------------------
+ *
+ * These replace the EXPAND_T0ARBITER register defines, which will be
+ * deprecated.
+ */
+#define ARBITER_ARBCONFIG0       0x04830120
+#define     ARBCFG_BPRIORITY_BIT     0x02000000
+#define     ARBCFG_IPRIORITY_BIT     0x01000000
+#define     ARBCFG_PAGE_BITS         0x00FF0000
+#define     ARBCFG_PAGE_S            16
+#define     ARBCFG_BBASE_BITS        0x0000FF00
+#define     ARGCFG_BBASE_S           8
+#define     ARBCFG_IBASE_BITS        0x000000FF
+#define     ARBCFG_IBASE_S           0
+#define ARBITER_TTECONFIG0       0x04820160
+#define     ARBTTE_IUPPER_BITS       0xFF000000
+#define     ARBTTE_IUPPER_S          24
+#define     ARBTTE_ILOWER_BITS       0x00FF0000
+#define     ARBTTE_ILOWER_S          16
+#define     ARBTTE_BUPPER_BITS       0x0000FF00
+#define     ARBTTE_BUPPER_S          8
+#define     ARBTTE_BLOWER_BITS       0x000000FF
+#define     ARBTTE_BLOWER_S          0
+#define ARBITER_STRIDE           8
+#endif /* METAC_2_1 */
+
+/*
+ * Expansion area registers
+ * --------------------------------------
+ */
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0WRCOMBINE         0x03000000
+#ifdef METAC_2_1
+#define     EXPWRC_LFFEn_BIT           0x00004000  /* Enable auto line full flush */
+#endif /* METAC_2_1 */
+#define     EXPWRC_ENABLE_BIT          0x00002000  /* Enable write combiner */
+#define     EXPWRC_TIMEOUT_ENABLE_BIT  0x00001000  /* Timeout flush enable */
+#define     EXPWRC_TIMEOUT_COUNT_BITS  0x000003FF
+#define     EXPWRC_TIMEOUT_COUNT_S     0
+#define EXPAND_TnWRCOMBINE_STRIDE  0x00000008
+
+/* These defines are to be deprecated. See above instead. */
+#define EXPAND_T0ARBITER         0x03000020
+#define     EXPARB_BPRIORITY_BIT 0x02000000
+#define     EXPARB_IPRIORITY_BIT 0x01000000
+#define     EXPARB_PAGE_BITS     0x00FF0000
+#define     EXPARB_PAGE_S        16
+#define     EXPARB_BBASE_BITS    0x0000FF00
+#define     EXPARB_BBASE_S       8
+#define     EXPARB_IBASE_BITS    0x000000FF
+#define     EXPARB_IBASE_S       0
+#define EXPAND_TnARBITER_STRIDE  0x00000008
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_TIMER_DIV   0x03000040
+#define     EXPTIM_DIV_BITS      0x000000FF
+#define     EXPTIM_DIV_S         0
+
+/* These definitions are to be deprecated. See above instead. */
+#define EXPAND_PERFCHAN0   0x03000050
+#define EXPAND_PERFCHAN1   0x03000058
+#define     EXPPERF_CTRL_BITS    0x0000000F
+#define     EXPPERF_CTRL_S       0
+#define         EXPPERF_WRC_WRBURST   0x0   /* Write combiner write burst */
+#define         EXPPERF_WRC_WRITE     0x1   /* Write combiner write       */
+#define         EXPPERF_WRC_RDBURST   0x2   /* Write combiner read burst  */
+#define         EXPPERF_WRC_READ      0x3   /* Write combiner read        */
+#define         EXPPERF_PREARB_DELAY  0x4   /* Pre-arbiter delay cycle    */
+                                           /* Cross-bar hold-off cycle:  */
+#define         EXPPERF_XBAR_HOLDWRAP 0x5   /*    wrapper register        */
+#define         EXPPERF_XBAR_HOLDSBUS 0x6   /*    system bus              */
+#ifdef METAC_1_2
+#define         EXPPERF_XBAR_HOLDLBUS 0x7   /*    local bus               */
+#else /* METAC_1_2 */
+#define         EXPPERF_XBAR_HOLDINTR 0x8   /*    internal ram            */
+#define         EXPPERF_XBAR_HOLDCREG 0x9   /*    core registers          */
+                                           /* Admission delay cycle:     */
+#define         EXPPERF_INPUT_SBUS    0xA   /*    register port           */
+#define         EXPPERF_INPUT_CREG    0xB   /*    core registers          */
+#define         EXPPERF_INPUT_INTR    0xC   /*    internal ram            */
+#define         EXPPERF_INPUT_WRC     0xD   /*    write combiners(memory) */
+#endif /* !METAC_1_2 */
+#define         EXPPERF_MAINARB_DELAY 0xF   /* Main arbiter delay cycle   */
+
+/*
+ * Debug port registers
+ * --------------------------------------
+ */
+
+/* Data Exchange Register */
+#define DBGPORT_MDBGDATAX                    0x0
+
+/* Data Transfer register */
+#define DBGPORT_MDBGDATAT                    0x4
+
+/* Control Register 0 */
+#define DBGPORT_MDBGCTRL0                    0x8
+#define     DBGPORT_MDBGCTRL0_ADDR_BITS      0xFFFFFFFC
+#define     DBGPORT_MDBGCTRL0_ADDR_S         2
+#define     DBGPORT_MDBGCTRL0_AUTOINCR_BIT   0x00000002
+#define     DBGPORT_MDBGCTRL0_RD_BIT         0x00000001
+
+/* Control Register 1 */
+#define DBGPORT_MDBGCTRL1                    0xC
+#ifdef METAC_2_1
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_BITS      0xC0000000
+#define    DBGPORT_MDBGCTRL1_DEFERRTHREAD_S         30
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_LOCK2_INTERLOCK_BIT   0x20000000
+#define     DBGPORT_MDBGCTRL1_ATOMIC_INTERLOCK_BIT  0x10000000
+#define     DBGPORT_MDBGCTRL1_TRIGSTATUS_BIT        0x08000000
+#define     DBGPORT_MDBGCTRL1_GBLPORT_IDLE_BIT      0x04000000
+#define     DBGPORT_MDBGCTRL1_COREMEM_IDLE_BIT      0x02000000
+#define     DBGPORT_MDBGCTRL1_READY_BIT             0x01000000
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_DEFERRID_BITS         0x00E00000
+#define     DBGPORT_MDBGCTRL1_DEFERRID_S            21
+#define     DBGPORT_MDBGCTRL1_DEFERR_BIT            0x00100000
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_WR_ACTIVE_BIT         0x00040000
+#define     DBGPORT_MDBGCTRL1_COND_LOCK2_BIT        0x00020000
+#define     DBGPORT_MDBGCTRL1_LOCK2_BIT             0x00010000
+#define     DBGPORT_MDBGCTRL1_DIAGNOSE_BIT          0x00008000
+#define     DBGPORT_MDBGCTRL1_FORCEDIAG_BIT         0x00004000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_BITS         0x00003000
+#define     DBGPORT_MDBGCTRL1_MEMFAULT_S            12
+#define     DBGPORT_MDBGCTRL1_TRIGGER_BIT           0x00000100
+#ifdef METAC_2_1
+#define     DBGPORT_MDBGCTRL1_INTSPECIAL_BIT        0x00000080
+#define     DBGPORT_MDBGCTRL1_INTRUSIVE_BIT         0x00000040
+#endif /* METAC_2_1 */
+#define     DBGPORT_MDBGCTRL1_THREAD_BITS           0x00000030 /* Thread mask selects threads */
+#define     DBGPORT_MDBGCTRL1_THREAD_S              4
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_BITS       0x0000000C
+#define     DBGPORT_MDBGCTRL1_TRANS_SIZE_S          2
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_32_BIT 0x00000000
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_16_BIT 0x00000004
+#define         DBGPORT_MDBGCTRL1_TRANS_SIZE_8_BIT  0x00000008
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_BITS       0x00000003
+#define     DBGPORT_MDBGCTRL1_BYTE_ROUND_S          0
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_8_BIT  0x00000001
+#define         DBGPORT_MDBGCTRL1_BYTE_ROUND_16_BIT 0x00000002
+
+
+/* L2 Cache registers */
+#define SYSC_L2C_INIT              0x048300C0
+#define SYSC_L2C_INIT_INIT                  1
+#define SYSC_L2C_INIT_IN_PROGRESS           0
+#define SYSC_L2C_INIT_COMPLETE              1
+
+#define SYSC_L2C_ENABLE            0x048300D0
+#define SYSC_L2C_ENABLE_ENABLE_BIT     0x00000001
+#define SYSC_L2C_ENABLE_PFENABLE_BIT   0x00000002
+
+#define SYSC_L2C_PURGE             0x048300C8
+#define SYSC_L2C_PURGE_PURGE                1
+#define SYSC_L2C_PURGE_IN_PROGRESS          0
+#define SYSC_L2C_PURGE_COMPLETE             1
+
+#endif /* _ASM_METAG_MEM_H_ */
diff --git a/arch/metag/include/asm/metag_regs.h b/arch/metag/include/asm/metag_regs.h
new file mode 100644 (file)
index 0000000..acf4b8e
--- /dev/null
@@ -0,0 +1,1184 @@
+/*
+ * asm/metag_regs.h
+ *
+ * Copyright (C) 2000-2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Various defines for Meta core (non memory-mapped) registers.
+ */
+
+#ifndef _ASM_METAG_REGS_H_
+#define _ASM_METAG_REGS_H_
+
+/*
+ * CHIP Unit Identifiers and Valid/Global register number masks
+ * ------------------------------------------------------------
+ */
+#define TXUCT_ID    0x0     /* Control unit regs */
+#ifdef METAC_1_2
+#define     TXUCT_MASK  0xFF0FFFFF  /* Valid regs 0..31  */
+#else
+#define     TXUCT_MASK  0xFF1FFFFF  /* Valid regs 0..31  */
+#endif
+#define     TGUCT_MASK  0x00000000  /* No global regs    */
+#define TXUD0_ID    0x1     /* Data unit regs */
+#define TXUD1_ID    0x2
+#define     TXUDX_MASK  0xFFFFFFFF  /* Valid regs 0..31 */
+#define     TGUDX_MASK  0xFFFF0000  /* Global regs for base inst */
+#define     TXUDXDSP_MASK   0x0F0FFFFF  /* Valid DSP regs */
+#define     TGUDXDSP_MASK   0x0E0E0000  /* Global DSP ACC regs */
+#define TXUA0_ID    0x3     /* Address unit regs */
+#define TXUA1_ID    0x4
+#define     TXUAX_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUAX_MASK  0x0000FF00  /* Global regs  8-15 */
+#define TXUPC_ID    0x5     /* PC registers */
+#define     TXUPC_MASK  0x00000003  /* Valid regs   0- 1 */
+#define     TGUPC_MASK  0x00000000  /* No global regs    */
+#define TXUPORT_ID  0x6     /* Ports are not registers */
+#define TXUTR_ID    0x7
+#define     TXUTR_MASK  0x0000005F  /* Valid regs   0-3,4,6 */
+#define     TGUTR_MASK  0x00000000  /* No global regs    */
+#ifdef METAC_2_1
+#define TXUTT_ID    0x8
+#define     TXUTT_MASK  0x0000000F  /* Valid regs   0-3 */
+#define     TGUTT_MASK  0x00000010  /* Global reg   4   */
+#define TXUFP_ID    0x9     /* FPU regs */
+#define     TXUFP_MASK  0x0000FFFF  /* Valid regs   0-15 */
+#define     TGUFP_MASK  0x00000000  /* No global regs    */
+#endif /* METAC_2_1 */
+
+#ifdef METAC_1_2
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+                     TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+                     0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+                     TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+                     0, 0, 0, 0, 0, 0, 0, 0                          }
+#else /* METAC_1_2 */
+#define TXUXX_MASKS { TXUCT_MASK, TXUDX_MASK, TXUDX_MASK, TXUAX_MASK, \
+                     TXUAX_MASK, TXUPC_MASK,          0, TXUTR_MASK, \
+                     TXUTT_MASK, TXUFP_MASK,          0,          0, \
+                              0,          0,          0,          0  }
+#define TGUXX_MASKS { TGUCT_MASK, TGUDX_MASK, TGUDX_MASK, TGUAX_MASK, \
+                     TGUAX_MASK, TGUPC_MASK,          0, TGUTR_MASK, \
+                     TGUTT_MASK, TGUFP_MASK,          0,          0, \
+                              0,          0,          0,          0  }
+#endif /* !METAC_1_2 */
+
+#define TXUXXDSP_MASKS { 0, TXUDXDSP_MASK, TXUDXDSP_MASK, 0, 0, 0, 0, 0, \
+                        0, 0, 0, 0, 0, 0, 0, 0                          }
+#define TGUXXDSP_MASKS { 0, TGUDXDSP_MASK, TGUDXDSP_MASK, 0, 0, 0, 0, 0, \
+                        0, 0, 0, 0, 0, 0, 0, 0                          }
+
+/* -------------------------------------------------------------------------
+;                          DATA AND ADDRESS UNIT REGISTERS
+;  -----------------------------------------------------------------------*/
+/*
+  Thread local D0 registers
+ */
+/*   D0.0    ; Holds 32-bit result, can be used as scratch */
+#define D0Re0 D0.0
+/*   D0.1    ; Used to pass Arg6_32 */
+#define D0Ar6 D0.1
+/*   D0.2    ; Used to pass Arg4_32 */
+#define D0Ar4 D0.2
+/*   D0.3    ; Used to pass Arg2_32 to a called routine (see D1.3 below) */
+#define D0Ar2 D0.3
+/*   D0.4    ; Can be used as scratch; used to save A0FrP in entry sequences */
+#define D0FrT D0.4
+/*   D0.5    ; C compiler assumes preservation, save with D1.5 if used */
+/*   D0.6    ; C compiler assumes preservation, save with D1.6 if used */
+/*   D0.7    ; C compiler assumes preservation, save with D1.7 if used */
+/*   D0.8    ; Use of D0.8 and above is not encouraged */
+/*   D0.9  */
+/*   D0.10 */
+/*   D0.11 */
+/*   D0.12 */
+/*   D0.13 */
+/*   D0.14 */
+/*   D0.15 */
+/*
+   Thread local D1 registers
+ */
+/*   D1.0    ; Holds top 32-bits of 64-bit result, can be used as scratch */
+#define D1Re0 D1.0
+/*   D1.1    ; Used to pass Arg5_32 */
+#define D1Ar5 D1.1
+/*   D1.2    ; Used to pass Arg3_32 */
+#define D1Ar3 D1.2
+/*   D1.3    ; Used to pass Arg1_32 (first 32-bit argument) to a called routine */
+#define D1Ar1 D1.3
+/*   D1.4    ; Used for Return Pointer, save during entry with A0FrP (via D0.4) */
+#define D1RtP D1.4
+/*   D1.5    ; C compiler assumes preservation, save if used */
+/*   D1.6    ; C compiler assumes preservation, save if used */
+/*   D1.7    ; C compiler assumes preservation, save if used */
+/*   D1.8    ; Use of D1.8 and above is not encouraged */
+/*   D1.9  */
+/*   D1.10 */
+/*   D1.11 */
+/*   D1.12 */
+/*   D1.13 */
+/*   D1.14 */
+/*   D1.15 */
+/*
+   Thread local A0 registers
+ */
+/*   A0.0    ; Primary stack pointer */
+#define A0StP A0.0
+/*   A0.1    ; Used as local frame pointer in C, save if used (via D0.4) */
+#define A0FrP A0.1
+/*   A0.2  */
+/*   A0.3  */
+/*   A0.4    ; Use of A0.4 and above is not encouraged */
+/*   A0.5  */
+/*   A0.6  */
+/*   A0.7  */
+/*
+   Thread local A1 registers
+ */
+/*   A1.0    ; Global static chain pointer - do not modify */
+#define A1GbP A1.0
+/*   A1.1    ; Local static chain pointer in C, can be used as scratch */
+#define A1LbP A1.1
+/*   A1.2  */
+/*   A1.3  */
+/*   A1.4    ; Use of A1.4 and above is not encouraged */
+/*   A1.5  */
+/*   A1.6  */
+/*   A1.7  */
+#ifdef METAC_2_1
+/* Renameable registers for use with Fast Interrupts */
+/* The interrupt stack pointer (usually a global register) */
+#define A0IStP A0IReg
+/* The interrupt global pointer (usually a global register) */
+#define A1IGbP A1IReg
+#endif
+/*
+   Further registers may be globally allocated via linkage/loading tools,
+   normally they are not used.
+ */
+/*-------------------------------------------------------------------------
+;                    STACK STRUCTURE and CALLING CONVENTION
+; -----------------------------------------------------------------------*/
+/*
+; Calling convention indicates that the following is the state of the
+; stack frame at the start of a routine-
+;
+;       Arg9_32 [A0StP+#-12]
+;       Arg8_32 [A0StP+#- 8]
+;       Arg7_32 [A0StP+#- 4]
+;   A0StP->
+;
+; Registers D1.3, D0.3, ..., to D0.1 are used to pass Arg1_32 to Arg6_32
+;   respectively. If a routine needs to store them on the stack in order
+;   to make sub-calls or because of the general complexity of the routine it
+;   is best to dump these registers immediately at the start of a routine
+;   using a MSETL or SETL instruction-
+;
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2; Only dump argments expected
+;or SETL    [A0StP+#8++],D0Ar2       ; Up to two 32-bit args expected
+;
+; For non-leaf routines it is always necessary to save and restore at least
+; the return address value D1RtP on the stack. Also by convention if the
+; frame is saved then a new A0FrP value must be set-up. So for non-leaf
+; routines at this point both these registers must be saved onto the stack
+; using a SETL instruction and the new A0FrP value is then set-up-
+;
+;   MOV     D0FrT,A0FrP
+;   ADD     A0FrP,A0StP,#0
+;   SETL    [A0StP+#8++],D0FrT,D1RtP
+;
+; Registers D0.5, D1.5, to D1.7 are assumed to be preserved across calls so
+;   a SETL or MSETL instruction can be used to save the current state
+;   of these registers if they are modified by the current routine-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Only save registers modified
+;or SETL    [A0StP+#8++],D0.5        ; Only D0.5 and/or D1.5 modified
+;
+; All of the above sequences can be combined into one maximal case-
+;
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+;
+; Having completed the above sequence the only remaining task on routine
+; entry is to reserve any local and outgoing argment storage space on the
+; stack. This instruction may be omitted if the size of this region is zero-
+;
+;   ADD     A0StP,A0StP,#(LCS)
+;
+; LCS is the first example use of one of a number of standard local defined
+; values that can be created to make assembler code more readable and
+; potentially more robust-
+;
+; #define ARS   0x18                 ; Register arg bytes saved on stack
+; #define FRS   0x20                 ; Frame save area size in bytes
+; #define LCS   0x00                 ; Locals and Outgoing arg size
+; #define ARO   (LCS+FRS)            ; Stack offset to access args
+;
+; All of the above defines should be undefined (#undef) at the end of each
+; routine to avoid accidental use in the next routine.
+;
+; Given all of the above the following stack structure is expected during
+; the body of a routine if all args passed in registers are saved during
+; entry-
+;
+;                                    ; 'Incoming args area'
+;         Arg10_32 [A0StP+#-((10*4)+ARO)]       Arg9_32  [A0StP+#-(( 9*4)+ARO)]
+;         Arg8_32  [A0StP+#-(( 8*4)+ARO)]       Arg7_32  [A0StP+#-(( 7*4)+ARO)]
+;--- Call point
+; D0Ar6=  Arg6_32  [A0StP+#-(( 6*4)+ARO)] D1Ar5=Arg5_32  [A0StP+#-(( 5*4)+ARO)]
+; D0Ar4=  Arg4_32  [A0StP+#-(( 4*4)+ARO)] D1Ar3=Arg3_32  [A0StP+#-(( 3*4)+ARO)]
+; D0Ar2=  Arg2_32  [A0StP+#-(( 2*4)+ARO)] D1Ar2=Arg1_32  [A0StP+#-(( 1*4)+ARO)]
+;                                    ; 'Frame area'
+; A0FrP-> D0FrT, D1RtP,
+;         D0.5, D1.5,
+;         D0.6, D1.6,
+;         D0.7, D1.7,
+;                                    ; 'Locals area'
+;         Loc0_32  [A0StP+# (( 0*4)-LCS)],      Loc1_32 [A0StP+# (( 1*4)-LCS)]
+;               .... other locals
+;         Locn_32  [A0StP+# (( n*4)-LCS)]
+;                                    ; 'Outgoing args area'
+;         Outm_32  [A0StP+#- ( m*4)]            .... other outgoing args
+;         Out8_32  [A0StP+#- ( 1*4)]            Out7_32  [A0StP+#- ( 1*4)]
+; A0StP-> (Out1_32-Out6_32 in regs D1Ar1-D0Ar6)
+;
+; The exit sequence for a non-leaf routine can use the frame pointer created
+; in the entry sequence to optimise the recovery of the full state-
+;
+;   MGETL   D0FrT,D0.5,D0.6,D0.7,[A0FrP]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;
+; Having described the most complex non-leaf case above, it is worth noting
+; that if a routine is a leaf and does not use any of the caller-preserved
+; state. The routine can be implemented as-
+;
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   SUB     A0StP,A0StP,#LCS
+;   MOV     PC,D1RtP
+;
+; The stack adjustments can also be omitted if no local storage is required.
+;
+; Another exit sequence structure is more applicable if for a leaf routine
+; with no local frame pointer saved/generated in which the call saved
+; registers need to be saved and restored-
+;
+;   MSETL   [A0StP],D0.5,D0.6,D0.7   ; Hence FRS is 0x18, ARS is 0x00
+;   ADD     A0StP,A0StP,#LCS
+;   .... body of routine
+;   GETL    D0.5,D1.5,[A0StP+#((0*8)-(FRS+LCS))]
+;   GETL    D0.6,D1.6,[A0StP+#((1*8)-(FRS+LCS))]
+;   GETL    D0.7,D1.7,[A0StP+#((2*8)-(FRS+LCS))]
+;   SUB     A0StP,A0StP,#(ARS+FRS+LCS)
+;   MOV     PC,D1RtP
+;
+; Lastly, to support profiling assembler code should use a fixed entry/exit
+; sequence if the trigger define _GMON_ASM is defined-
+;
+;   #ifndef _GMON_ASM
+;   ... optimised entry code
+;   #else
+;   ; Profiling entry case
+;   MOV     D0FrT,A0FrP              ; Save and calculate new frame pointer
+;   ADD     A0FrP,A0StP,#(ARS)
+;   MSETL   [A0StP],...,D0FrT,... or SETL    [A0FrP],D0FrT,D1RtP
+;   CALLR   D0FrT,_mcount_wrapper
+;   #endif
+;   ... body of routine
+;   #ifndef _GMON_ASM
+;   ... optimised exit code
+;   #else
+;   ; Profiling exit case
+;   MGETL   D0FrT,...,[A0FrP]     or GETL    D0FrT,D1RtP,[A0FrP++]
+;   SUB     A0StP,A0FrP,#(ARS+FRS)
+;   MOV     A0FrP,D0FrT
+;   MOV     PC,D1RtP
+;   #endif
+
+
+; -------------------------------------------------------------------------
+;                         CONTROL UNIT REGISTERS
+; -------------------------------------------------------------------------
+;
+; See the assembler guide, hardware documentation, or the field values
+; defined below for some details of the use of these registers.
+*/
+#define TXENABLE    CT.0    /* Need to define bit-field values in these */
+#define TXMODE      CT.1
+#define TXSTATUS    CT.2    /* DEFAULT 0x00020000 */
+#define TXRPT       CT.3
+#define TXTIMER     CT.4
+#define TXL1START   CT.5
+#define TXL1END     CT.6
+#define TXL1COUNT   CT.7
+#define TXL2START   CT.8
+#define TXL2END     CT.9
+#define TXL2COUNT   CT.10
+#define TXBPOBITS   CT.11
+#define TXMRSIZE    CT.12
+#define TXTIMERI    CT.13
+#define TXDRCTRL    CT.14  /* DEFAULT 0x0XXXF0F0 */
+#define TXDRSIZE    CT.15
+#define TXCATCH0    CT.16
+#define TXCATCH1    CT.17
+#define TXCATCH2    CT.18
+#define TXCATCH3    CT.19
+
+#ifdef METAC_2_1
+#define TXDEFR      CT.20
+#define TXCPRS      CT.21
+#endif
+
+#define TXINTERN0   CT.23
+#define TXAMAREG0   CT.24
+#define TXAMAREG1   CT.25
+#define TXAMAREG2   CT.26
+#define TXAMAREG3   CT.27
+#define TXDIVTIME   CT.28   /* DEFAULT 0x00000001 */
+#define TXPRIVEXT   CT.29   /* DEFAULT 0x003B0000 */
+#define TXTACTCYC   CT.30
+#define TXIDLECYC   CT.31
+
+/*****************************************************************************
+ *                        CONTROL UNIT REGISTER BITS
+ ****************************************************************************/
+/*
+ * The following registers and where appropriate the sub-fields of those
+ * registers are defined for pervasive use in controlling program flow.
+ */
+
+/*
+ * TXENABLE register fields - only the thread id is routinely useful
+ */
+#define TXENABLE_REGNUM 0
+#define TXENABLE_THREAD_BITS       0x00000700
+#define TXENABLE_THREAD_S          8
+#define TXENABLE_REV_STEP_BITS     0x000000F0
+#define TXENABLE_REV_STEP_S        4
+
+/*
+ * TXMODE register - controls extensions of the instruction set
+ */
+#define TXMODE_REGNUM 1
+#define     TXMODE_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * TXSTATUS register - contains a couple of stable bits that can be used
+ *      to determine the privilege processing level and interrupt
+ *      processing level of the current thread.
+ */
+#define TXSTATUS_REGNUM 2
+#define TXSTATUS_PSTAT_BIT         0x00020000   /* -> Privilege active      */
+#define TXSTATUS_PSTAT_S           17
+#define TXSTATUS_ISTAT_BIT         0x00010000   /* -> In interrupt state    */
+#define TXSTATUS_ISTAT_S           16
+
+/*
+ * These are all relatively boring registers, mostly full 32-bit
+ */
+#define TXRPT_REGNUM     3  /* Repeat counter for XFR... instructions   */
+#define TXTIMER_REGNUM   4  /* Timer-- causes timer trigger on overflow */
+#define TXL1START_REGNUM 5  /* Hardware Loop 1 Start-PC/End-PC/Count    */
+#define TXL1END_REGNUM   6
+#define TXL1COUNT_REGNUM 7
+#define TXL2START_REGNUM 8  /* Hardware Loop 2 Start-PC/End-PC/Count    */
+#define TXL2END_REGNUM   9
+#define TXL2COUNT_REGNUM 10
+#define TXBPOBITS_REGNUM 11 /* Branch predict override bits - tune perf */
+#define TXTIMERI_REGNUM  13 /* Timer-- time based interrupt trigger     */
+
+/*
+ * TXDIVTIME register is routinely read to calculate the time-base for
+ * the TXTIMER register.
+ */
+#define TXDIVTIME_REGNUM 28
+#define     TXDIVTIME_DIV_BITS 0x000000FF
+#define     TXDIVTIME_DIV_S    0
+#define     TXDIVTIME_DIV_MIN  0x00000001   /* Maximum resolution       */
+#define     TXDIVTIME_DIV_MAX  0x00000100   /* 1/1 -> 1/256 resolution  */
+#define     TXDIVTIME_BASE_HZ  1000000      /* Timers run at 1Mhz @1/1  */
+
+/*
+ * TXPRIVEXT register can be consulted to decide if write access to a
+ *    part of the threads register set is not permitted when in
+ *    unprivileged mode (PSTAT == 0).
+ */
+#define TXPRIVEXT_REGNUM 29
+#define     TXPRIVEXT_COPRO_BITS    0xFF000000 /* Co-processor 0-7 */
+#define     TXPRIVEXT_COPRO_S       24
+#ifndef METAC_1_2
+#define     TXPRIVEXT_TXTIMER_BIT   0x00080000 /* TXTIMER   priv */
+#define     TXPRIVEXT_TRACE_BIT     0x00040000 /* TTEXEC|TTCTRL|GTEXEC */
+#endif
+#define     TXPRIVEXT_TXTRIGGER_BIT 0x00020000 /* TXSTAT|TXMASK|TXPOLL */
+#define     TXPRIVEXT_TXGBLCREG_BIT 0x00010000 /* Global common regs */
+#define     TXPRIVEXT_CBPRIV_BIT    0x00008000 /* Mem i/f dump priv */
+#define     TXPRIVEXT_ILOCK_BIT     0x00004000 /* LOCK inst priv */
+#define     TXPRIVEXT_TXITACCYC_BIT 0x00002000 /* TXIDLECYC|TXTACTCYC */
+#define     TXPRIVEXT_TXDIVTIME_BIT 0x00001000 /* TXDIVTIME priv */
+#define     TXPRIVEXT_TXAMAREGX_BIT 0x00000800 /* TXAMAREGX priv */
+#define     TXPRIVEXT_TXTIMERI_BIT  0x00000400 /* TXTIMERI  priv */
+#define     TXPRIVEXT_TXSTATUS_BIT  0x00000200 /* TXSTATUS  priv */
+#define     TXPRIVEXT_TXDISABLE_BIT 0x00000100 /* TXENABLE  priv */
+#ifndef METAC_1_2
+#define     TXPRIVEXT_MINIMON_BIT   0x00000080 /* Enable Minim features */
+#define     TXPRIVEXT_OLDBCCON_BIT  0x00000020 /* Restore Static predictions */
+#define     TXPRIVEXT_ALIGNREW_BIT  0x00000010 /* Align & precise checks */
+#endif
+#define     TXPRIVEXT_KEEPPRI_BIT   0x00000008 /* Use AMA_Priority if ISTAT=1*/
+#define     TXPRIVEXT_TXTOGGLEI_BIT 0x00000001 /* TX.....I  priv */
+
+/*
+ * TXTACTCYC register - counts instructions issued for this thread
+ */
+#define TXTACTCYC_REGNUM  30
+#define     TXTACTCYC_COUNT_MASK    0x00FFFFFF
+
+/*
+ * TXIDLECYC register - counts idle cycles
+ */
+#define TXIDLECYC_REGNUM  31
+#define     TXIDLECYC_COUNT_MASK    0x00FFFFFF
+
+/*****************************************************************************
+ *                             DSP EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values relate to fields and controls that only a program
+ * using the DSP extensions of the META instruction set need to know.
+ */
+
+
+#ifndef METAC_1_2
+/*
+ * Allow co-processor hardware to replace the read pipeline data source in
+ * appropriate cases.
+ */
+#define TXMODE_RDCPEN_BIT       0x00800000
+#endif
+
+/*
+ * Address unit addressing modes
+ */
+#define TXMODE_A1ADDR_BITS  0x00007000
+#define TXMODE_A1ADDR_S     12
+#define TXMODE_A0ADDR_BITS  0x00000700
+#define TXMODE_A0ADDR_S     8
+#define     TXMODE_AXADDR_MODULO 3
+#define     TXMODE_AXADDR_REVB   4
+#define     TXMODE_AXADDR_REVW   5
+#define     TXMODE_AXADDR_REVD   6
+#define     TXMODE_AXADDR_REVL   7
+
+/*
+ * Data unit OverScale select (default 0 -> normal, 1 -> top 16 bits)
+ */
+#define TXMODE_DXOVERSCALE_BIT  0x00000080
+
+/*
+ * Data unit MX mode select (default 0 -> MX16, 1 -> MX8)
+ */
+#define TXMODE_M8_BIT         0x00000040
+
+/*
+ * Data unit accumulator saturation point (default -> 40 bit accumulator)
+ */
+#define TXMODE_DXACCSAT_BIT 0x00000020 /* Set for 32-bit accumulator */
+
+/*
+ * Data unit accumulator saturation enable (default 0 -> no saturation)
+ */
+#define TXMODE_DXSAT_BIT    0x00000010
+
+/*
+ * Data unit master rounding control (default 0 -> normal, 1 -> convergent)
+ */
+#define TXMODE_DXROUNDING_BIT   0x00000008
+
+/*
+ * Data unit product shift for fractional arithmetic (default off)
+ */
+#define TXMODE_DXPRODSHIFT_BIT  0x00000004
+
+/*
+ * Select the arithmetic mode (multiply mostly) for both data units
+ */
+#define TXMODE_DXARITH_BITS 0x00000003
+#define     TXMODE_DXARITH_32  3
+#define     TXMODE_DXARITH_32H 2
+#define     TXMODE_DXARITH_S16 1
+#define     TXMODE_DXARITH_16  0
+
+/*
+ * TXMRSIZE register value only relevant when DSP modulo addressing active
+ */
+#define TXMRSIZE_REGNUM 12
+#define     TXMRSIZE_MIN    0x0002  /* 0, 1 -> normal addressing logic */
+#define     TXMRSIZE_MAX    0xFFFF
+
+/*
+ * TXDRCTRL register can be used to detect the actaul size of the DSP RAM
+ * partitions allocated to this thread.
+ */
+#define TXDRCTRL_REGNUM 14
+#define     TXDRCTRL_SINESIZE_BITS  0x0F000000
+#define     TXDRCTRL_SINESIZE_S     24
+#define     TXDRCTRL_RAMSZPOW_BITS  0x001F0000  /* Limit = (1<<RAMSZPOW)-1 */
+#define     TXDRCTRL_RAMSZPOW_S     16
+#define     TXDRCTRL_D1RSZAND_BITS  0x0000F000  /* Mask top 4 bits - D1 */
+#define     TXDRCTRL_D1RSZAND_S     12
+#define     TXDRCTRL_D0RSZAND_BITS  0x000000F0  /* Mask top 4 bits - D0 */
+#define     TXDRCTRL_D0RSZAND_S     4
+/* Given extracted RAMSZPOW and DnRSZAND fields this returns the size */
+#define     TXDRCTRL_DXSIZE(Pow, AndBits) \
+                               ((((~(AndBits)) & 0x0f) + 1) << ((Pow)-4))
+
+/*
+ * TXDRSIZE register provides modulo addressing options for each DSP RAM
+ */
+#define TXDRSIZE_REGNUM 15
+#define     TXDRSIZE_R1MOD_BITS       0xFFFF0000
+#define     TXDRSIZE_R1MOD_S          16
+#define     TXDRSIZE_R0MOD_BITS       0x0000FFFF
+#define     TXDRSIZE_R0MOD_S          0
+
+#define     TXDRSIZE_RBRAD_SCALE_BITS 0x70000000
+#define     TXDRSIZE_RBRAD_SCALE_S    28
+#define     TXDRSIZE_RBMODSIZE_BITS   0x0FFF0000
+#define     TXDRSIZE_RBMODSIZE_S      16
+#define     TXDRSIZE_RARAD_SCALE_BITS 0x00007000
+#define     TXDRSIZE_RARAD_SCALE_S    12
+#define     TXDRSIZE_RAMODSIZE_BITS   0x00000FFF
+#define     TXDRSIZE_RAMODSIZE_S      0
+
+/*****************************************************************************
+ *                       DEFERRED and BUS ERROR EXTENSION
+ ****************************************************************************/
+
+/*
+ * TXDEFR register - Deferred exception control
+ */
+#define TXDEFR_REGNUM 20
+#define     TXDEFR_DEFAULT  0   /* All fields default to zero */
+
+/*
+ * Bus error state is a multi-bit positive/negative event notification from
+ * the bus infrastructure.
+ */
+#define     TXDEFR_BUS_ERR_BIT    0x80000000  /* Set if error (LSB STATE) */
+#define     TXDEFR_BUS_ERRI_BIT   0x40000000  /* Fetch returned error */
+#define     TXDEFR_BUS_STATE_BITS 0x3F000000  /* Bus event/state data */
+#define     TXDEFR_BUS_STATE_S    24
+#define     TXDEFR_BUS_TRIG_BIT   0x00800000  /* Set when bus error seen */
+
+/*
+ * Bus events are collected by background code in a deferred manner unless
+ * selected to trigger an extended interrupt HALT trigger when they occur.
+ */
+#define     TXDEFR_BUS_ICTRL_BIT  0x00000080  /* Enable interrupt trigger */
+
+/*
+ * CHIP Automatic Mips Allocation control registers
+ * ------------------------------------------------
+ */
+
+/* CT Bank AMA Registers */
+#define TXAMAREG0_REGNUM 24
+#ifdef METAC_1_2
+#define     TXAMAREG0_CTRL_BITS       0x07000000
+#else /* METAC_1_2 */
+#define     TXAMAREG0_RCOFF_BIT       0x08000000
+#define     TXAMAREG0_DLINEHLT_BIT    0x04000000
+#define     TXAMAREG0_DLINEDIS_BIT    0x02000000
+#define     TXAMAREG0_CYCSTRICT_BIT   0x01000000
+#define     TXAMAREG0_CTRL_BITS       (TXAMAREG0_RCOFF_BIT |    \
+                                      TXAMAREG0_DLINEHLT_BIT | \
+                                      TXAMAREG0_DLINEDIS_BIT | \
+                                      TXAMAREG0_CYCSTRICT_BIT)
+#endif /* !METAC_1_2 */
+#define     TXAMAREG0_CTRL_S           24
+#define     TXAMAREG0_MDM_BIT         0x00400000
+#define     TXAMAREG0_MPF_BIT         0x00200000
+#define     TXAMAREG0_MPE_BIT         0x00100000
+#define     TXAMAREG0_MASK_BITS       (TXAMAREG0_MDM_BIT | \
+                                      TXAMAREG0_MPF_BIT | \
+                                      TXAMAREG0_MPE_BIT)
+#define     TXAMAREG0_MASK_S          20
+#define     TXAMAREG0_SDM_BIT         0x00040000
+#define     TXAMAREG0_SPF_BIT         0x00020000
+#define     TXAMAREG0_SPE_BIT         0x00010000
+#define     TXAMAREG0_STATUS_BITS     (TXAMAREG0_SDM_BIT | \
+                                      TXAMAREG0_SPF_BIT | \
+                                      TXAMAREG0_SPE_BIT)
+#define     TXAMAREG0_STATUS_S        16
+#define     TXAMAREG0_PRIORITY_BITS   0x0000FF00
+#define     TXAMAREG0_PRIORITY_S      8
+#define     TXAMAREG0_BVALUE_BITS     0x000000FF
+#define     TXAMAREG0_BVALUE_S  0
+
+#define TXAMAREG1_REGNUM 25
+#define     TXAMAREG1_DELAYC_BITS     0x07FFFFFF
+#define     TXAMAREG1_DELAYC_S  0
+
+#define TXAMAREG2_REGNUM 26
+#ifdef METAC_1_2
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFFF
+#define     TXAMAREG2_DLINEC_S        0
+#else /* METAC_1_2 */
+#define     TXAMAREG2_IRQPRIORITY_BIT 0xFF000000
+#define     TXAMAREG2_IRQPRIORITY_S   24
+#define     TXAMAREG2_DLINEC_BITS     0x00FFFFF0
+#define     TXAMAREG2_DLINEC_S        4
+#endif /* !METAC_1_2 */
+
+#define TXAMAREG3_REGNUM 27
+#define     TXAMAREG2_AMABLOCK_BIT    0x00080000
+#define     TXAMAREG2_AMAC_BITS       0x0000FFFF
+#define     TXAMAREG2_AMAC_S          0
+
+/*****************************************************************************
+ *                                FPU EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following registers only exist in FPU enabled cores.
+ */
+
+/*
+ * TXMODE register - FPU rounding mode control/status fields
+ */
+#define     TXMODE_FPURMODE_BITS     0x00030000
+#define     TXMODE_FPURMODE_S        16
+#define     TXMODE_FPURMODEWRITE_BIT 0x00040000  /* Set to change FPURMODE */
+
+/*
+ * TXDEFR register - FPU exception handling/state is a significant source
+ *   of deferrable errors. Run-time S/W can move handling to interrupt level
+ *   using DEFR instruction to collect state.
+ */
+#define     TXDEFR_FPE_FE_BITS       0x003F0000  /* Set by FPU_FE events */
+#define     TXDEFR_FPE_FE_S          16
+
+#define     TXDEFR_FPE_INEXACT_FE_BIT   0x010000
+#define     TXDEFR_FPE_UNDERFLOW_FE_BIT 0x020000
+#define     TXDEFR_FPE_OVERFLOW_FE_BIT  0x040000
+#define     TXDEFR_FPE_DIVBYZERO_FE_BIT 0x080000
+#define     TXDEFR_FPE_INVALID_FE_BIT   0x100000
+#define     TXDEFR_FPE_DENORMAL_FE_BIT  0x200000
+
+#define     TXDEFR_FPE_ICTRL_BITS    0x000003F   /* Route to interrupts */
+#define     TXDEFR_FPE_ICTRL_S       0
+
+#define     TXDEFR_FPE_INEXACT_ICTRL_BIT   0x01
+#define     TXDEFR_FPE_UNDERFLOW_ICTRL_BIT 0x02
+#define     TXDEFR_FPE_OVERFLOW_ICTRL_BIT  0x04
+#define     TXDEFR_FPE_DIVBYZERO_ICTRL_BIT 0x08
+#define     TXDEFR_FPE_INVALID_ICTRL_BIT   0x10
+#define     TXDEFR_FPE_DENORMAL_ICTRL_BIT  0x20
+
+/*
+ * DETAILED FPU RELATED VALUES
+ * ---------------------------
+ */
+
+/*
+ * Rounding mode field in TXMODE can hold a number of logical values
+ */
+#define METAG_FPURMODE_TONEAREST  0x0      /* Default */
+#define METAG_FPURMODE_TOWARDZERO 0x1
+#define METAG_FPURMODE_UPWARD     0x2
+#define METAG_FPURMODE_DOWNWARD   0x3
+
+/*
+ * In order to set the TXMODE register field that controls the rounding mode
+ * an extra bit must be set in the value written versus that read in order
+ * to gate writes to the rounding mode field. This allows other non-FPU code
+ * to modify TXMODE without knowledge of the FPU units presence and not
+ * influence the FPU rounding mode. This macro adds the required bit so new
+ * rounding modes are accepted.
+ */
+#define TXMODE_FPURMODE_SET(FPURMode) \
+       (TXMODE_FPURMODEWRITE_BIT + ((FPURMode)<<TXMODE_FPURMODE_S))
+
+/*
+ * To successfully restore TXMODE to zero at the end of the function the
+ * following value (rather than zero) must be used.
+ */
+#define TXMODE_FPURMODE_RESET (TXMODE_FPURMODEWRITE_BIT)
+
+/*
+ * In TXSTATUS a special bit exists to indicate if FPU H/W has been accessed
+ * since it was last reset.
+ */
+#define TXSTATUS_FPACTIVE_BIT  0x01000000
+
+/*
+ * Exception state (see TXDEFR_FPU_FE_*) and enabling (for interrupt
+ * level processing (see TXDEFR_FPU_ICTRL_*) are controlled by similar
+ * bit mask locations within each field.
+ */
+#define METAG_FPU_FE_INEXACT   0x01
+#define METAG_FPU_FE_UNDERFLOW 0x02
+#define METAG_FPU_FE_OVERFLOW  0x04
+#define METAG_FPU_FE_DIVBYZERO 0x08
+#define METAG_FPU_FE_INVALID   0x10
+#define METAG_FPU_FE_DENORMAL  0x20
+#define METAG_FPU_FE_ALL_EXCEPT (METAG_FPU_FE_INEXACT   | \
+                                METAG_FPU_FE_UNDERFLOW | \
+                                METAG_FPU_FE_OVERFLOW  | \
+                                METAG_FPU_FE_DIVBYZERO | \
+                                METAG_FPU_FE_INVALID   | \
+                                METAG_FPU_FE_DENORMAL)
+
+/*****************************************************************************
+ *             THREAD CONTROL, ERROR, OR INTERRUPT STATE EXTENSIONS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that externally controls
+ * threads, handles errors/interrupts, and/or set-up interrupt/error handlers
+ * for subsequent use.
+ */
+
+/*
+ * TXENABLE register fields - only ENABLE_BIT is potentially read/write
+ */
+#define TXENABLE_MAJOR_REV_BITS    0xFF000000
+#define TXENABLE_MAJOR_REV_S       24
+#define TXENABLE_MINOR_REV_BITS    0x00FF0000
+#define TXENABLE_MINOR_REV_S       16
+#define TXENABLE_CLASS_BITS        0x0000F000
+#define TXENABLE_CLASS_S           12
+#define TXENABLE_CLASS_DSP             0x0 /* -> DSP Thread */
+#define TXENABLE_CLASS_LDSP            0x8 /* -> DSP LITE Thread */
+#define TXENABLE_CLASS_GP              0xC /* -> General Purpose Thread */
+#define     TXENABLE_CLASSALT_LFPU       0x2 /*  Set to indicate LITE FPU */
+#define     TXENABLE_CLASSALT_FPUR8      0x1 /*  Set to indicate 8xFPU regs */
+#define TXENABLE_MTXARCH_BIT       0x00000800
+#define TXENABLE_STEP_REV_BITS     0x000000F0
+#define TXENABLE_STEP_REV_S        4
+#define TXENABLE_STOPPED_BIT       0x00000004   /* TXOFF due to ENABLE->0 */
+#define TXENABLE_OFF_BIT           0x00000002   /* Thread is in off state */
+#define TXENABLE_ENABLE_BIT        0x00000001   /* Set if running */
+
+/*
+ * TXSTATUS register - used by external/internal interrupt/error handler
+ */
+#define TXSTATUS_CB1MARKER_BIT     0x00800000   /* -> int level mem state */
+#define TXSTATUS_CBMARKER_BIT      0x00400000   /* -> mem i/f state dumped */
+#define TXSTATUS_MEM_FAULT_BITS    0x00300000
+#define TXSTATUS_MEM_FAULT_S       20
+#define     TXSTATUS_MEMFAULT_NONE  0x0 /* -> No memory fault       */
+#define     TXSTATUS_MEMFAULT_GEN   0x1 /* -> General fault         */
+#define     TXSTATUS_MEMFAULT_PF    0x2 /* -> Page fault            */
+#define     TXSTATUS_MEMFAULT_RO    0x3 /* -> Read only fault       */
+#define TXSTATUS_MAJOR_HALT_BITS   0x000C0000
+#define TXSTATUS_MAJOR_HALT_S      18
+#define     TXSTATUS_MAJHALT_TRAP 0x0   /* -> SWITCH inst used      */
+#define     TXSTATUS_MAJHALT_INST 0x1   /* -> Unknown inst or fetch */
+#define     TXSTATUS_MAJHALT_PRIV 0x2   /* -> Internal privilege    */
+#define     TXSTATUS_MAJHALT_MEM  0x3   /* -> Memory i/f fault      */
+#define TXSTATUS_L_STEP_BITS       0x00000800   /* -> Progress of L oper    */
+#define TXSTATUS_LSM_STEP_BITS     0x00000700   /* -> Progress of L/S mult  */
+#define TXSTATUS_LSM_STEP_S        8
+#define TXSTATUS_FLAG_BITS         0x0000001F   /* -> All the flags         */
+#define TXSTATUS_SCC_BIT           0x00000010   /* -> Split-16 flags ...    */
+#define TXSTATUS_SCF_LZ_BIT        0x00000008   /* -> Split-16 Low  Z flag  */
+#define TXSTATUS_SCF_HZ_BIT        0x00000004   /* -> Split-16 High Z flag  */
+#define TXSTATUS_SCF_HC_BIT        0x00000002   /* -> Split-16 High C flag  */
+#define TXSTATUS_SCF_LC_BIT        0x00000001   /* -> Split-16 Low  C flag  */
+#define TXSTATUS_CF_Z_BIT          0x00000008   /* -> Condition Z flag      */
+#define TXSTATUS_CF_N_BIT          0x00000004   /* -> Condition N flag      */
+#define TXSTATUS_CF_O_BIT          0x00000002   /* -> Condition O flag      */
+#define TXSTATUS_CF_C_BIT          0x00000001   /* -> Condition C flag      */
+
+/*
+ * TXCATCH0-3 register contents may store information on a memory operation
+ * that has failed if the bit TXSTATUS_CBMARKER_BIT is set.
+ */
+#define TXCATCH0_REGNUM 16
+#define TXCATCH1_REGNUM 17
+#define     TXCATCH1_ADDR_BITS   0xFFFFFFFF   /* TXCATCH1 is Addr 0-31 */
+#define     TXCATCH1_ADDR_S      0
+#define TXCATCH2_REGNUM 18
+#define     TXCATCH2_DATA0_BITS  0xFFFFFFFF   /* TXCATCH2 is Data 0-31 */
+#define     TXCATCH2_DATA0_S     0
+#define TXCATCH3_REGNUM 19
+#define     TXCATCH3_DATA1_BITS  0xFFFFFFFF   /* TXCATCH3 is Data 32-63 */
+#define     TXCATCH3_DATA1_S     0
+
+/*
+ * Detailed catch state information
+ * --------------------------------
+ */
+
+/* Contents of TXCATCH0 register */
+#define     TXCATCH0_LDRXX_BITS  0xF8000000  /* Load destination reg 0-31 */
+#define     TXCATCH0_LDRXX_S     27
+#define     TXCATCH0_LDDST_BITS  0x07FF0000  /* Load destination bits */
+#define     TXCATCH0_LDDST_S     16
+#define         TXCATCH0_LDDST_D1DSP 0x400   /* One bit set if it's a LOAD */
+#define         TXCATCH0_LDDST_D0DSP 0x200
+#define         TXCATCH0_LDDST_TMPLT 0x100
+#define         TXCATCH0_LDDST_TR    0x080
+#ifdef METAC_2_1
+#define         TXCATCH0_LDDST_FPU   0x040
+#endif
+#define         TXCATCH0_LDDST_PC    0x020
+#define         TXCATCH0_LDDST_A1    0x010
+#define         TXCATCH0_LDDST_A0    0x008
+#define         TXCATCH0_LDDST_D1    0x004
+#define         TXCATCH0_LDDST_D0    0x002
+#define         TXCATCH0_LDDST_CT    0x001
+#ifdef METAC_2_1
+#define     TXCATCH0_WATCHSTOP_BIT 0x00004000  /* Set if Data Watch set fault */
+#endif
+#define     TXCATCH0_WATCHS_BIT  0x00004000  /* Set if Data Watch set fault */
+#define     TXCATCH0_WATCH1_BIT  0x00002000  /* Set if Data Watch 1 matches */
+#define     TXCATCH0_WATCH0_BIT  0x00001000  /* Set if Data Watch 0 matches */
+#define     TXCATCH0_FAULT_BITS  0x00000C00  /* See TXSTATUS_MEMFAULT_*     */
+#define     TXCATCH0_FAULT_S     10
+#define     TXCATCH0_PRIV_BIT    0x00000200  /* Privilege of transaction    */
+#define     TXCATCH0_READ_BIT    0x00000100  /* Set for Read or Load cases  */
+
+#ifdef METAC_2_1
+/* LNKGET Marker bit in TXCATCH0 */
+#define   TXCATCH0_LNKGET_MARKER_BIT 0x00000008
+#define       TXCATCH0_PREPROC_BIT  0x00000004
+#endif
+
+/* Loads are indicated by one of the LDDST bits being set */
+#define     TXCATCH0_LDM16_BIT   0x00000004  /* Load M16 flag */
+#define     TXCATCH0_LDL2L1_BITS 0x00000003  /* Load data size L2,L1 */
+#define     TXCATCH0_LDL2L1_S    0
+
+/* Reads are indicated by the READ bit being set without LDDST bits */
+#define     TXCATCH0_RAXX_BITS   0x0000001F  /* RAXX issue port for read */
+#define     TXCATCH0_RAXX_S      0
+
+/* Write operations are all that remain if READ bit is not set */
+#define     TXCATCH0_WMASK_BITS  0x000000FF  /* Write byte lane mask */
+#define     TXCATCH0_WMASK_S     0
+
+#ifdef METAC_2_1
+
+/* When a FPU exception is signalled then FPUSPEC == FPUSPEC_TAG */
+#define     TXCATCH0_FPURDREG_BITS    0xF8000000
+#define     TXCATCH0_FPURDREG_S       27
+#define     TXCATCH0_FPUR1REG_BITS    0x07C00000
+#define     TXCATCH0_FPUR1REG_S       22
+#define     TXCATCH0_FPUSPEC_BITS     0x000F0000
+#define     TXCATCH0_FPUSPEC_S        16
+#define         TXCATCH0_FPUSPEC_TAG      0xF
+#define     TXCATCH0_FPUINSTA_BIT     0x00001000
+#define     TXCATCH0_FPUINSTQ_BIT     0x00000800
+#define     TXCATCH0_FPUINSTZ_BIT     0x00000400
+#define     TXCATCH0_FPUINSTN_BIT     0x00000200
+#define     TXCATCH0_FPUINSTO3O_BIT   0x00000100
+#define     TXCATCH0_FPUWIDTH_BITS    0x000000C0
+#define     TXCATCH0_FPUWIDTH_S       6
+#define         TXCATCH0_FPUWIDTH_FLOAT   0
+#define         TXCATCH0_FPUWIDTH_DOUBLE  1
+#define         TXCATCH0_FPUWIDTH_PAIRED  2
+#define     TXCATCH0_FPUOPENC_BITS    0x0000003F
+#define     TXCATCH0_FPUOPENC_S       0
+#define         TXCATCH0_FPUOPENC_ADD     0  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUB     1  /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUL     2  /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ATOI    3  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_ATOX    4  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ITOA    5  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_XTOA    6  /* rop3=Rs, uses #Imm */
+#define         TXCATCH0_FPUOPENC_ATOH    7  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_HTOA    8  /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_DTOF    9  /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_FTOD    10 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOL    11 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_LTOD    12 /* rop3=Rs */
+#define         TXCATCH0_FPUOPENC_DTOXL   13 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_XLTOD   14 /* rop3=Rs, uses #imm */
+#define         TXCATCH0_FPUOPENC_CMP     15 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MIN     16 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MAX     17 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_ADDRE   18 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_SUBRE   19 /* rop1=Rs1, rop3=Rs2 */
+#define         TXCATCH0_FPUOPENC_MULRE   20 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MXA     21 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MXAS    22 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MAR     23 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MARS    24 /* rop1=Rs1, rop2=Rs2 */
+#define         TXCATCH0_FPUOPENC_MUZ     25 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_MUZS    26 /* rop1=Rs1, rop2=Rs2, rop3=Rs3*/
+#define         TXCATCH0_FPUOPENC_RCP     27 /* rop2=Rs */
+#define         TXCATCH0_FPUOPENC_RSQ     28 /* rop2=Rs */
+
+/* For floating point exceptions TXCATCH1 is used to carry extra data */
+#define     TXCATCH1_FPUR2REG_BITS    0xF8000000
+#define     TXCATCH1_FPUR2REG_S       27
+#define     TXCATCH1_FPUR3REG_BITS    0x07C00000  /* Undefined if O3O set */
+#define     TXCATCH1_FPUR3REG_S       22
+#define     TXCATCH1_FPUIMM16_BITS    0x0000FFFF
+#define     TXCATCH1_FPUIMM16_S       0
+
+#endif /* METAC_2_1 */
+
+/*
+ * TXDIVTIME register used to hold the partial base address of memory i/f
+ * state dump area. Now deprecated.
+ */
+#define     TXDIVTIME_CBBASE_MASK    0x03FFFE00
+#define     TXDIVTIME_CBBASE_LINBASE 0x80000000
+#define     TXDIVTIME_CBBASE_LINBOFF 0x00000000 /* BGnd state */
+#define     TXDIVTIME_CBBASE_LINIOFF 0x00000100 /* Int  state */
+
+/*
+ * TXDIVTIME register used to indicate if the read pipeline was dirty when a
+ * thread was interrupted, halted, or generated an exception. It is invalid
+ * to attempt to issue a further pipeline read address while the read
+ * pipeline is in the dirty state.
+ */
+#define     TXDIVTIME_RPDIRTY_BIT   0x80000000
+
+/*
+ * Further bits in the TXDIVTIME register allow interrupt handling code to
+ * short-cut the discovery the most significant bit last read from TXSTATI.
+ *
+ * This is the bit number of the trigger line that a low level interrupt
+ * handler should acknowledge and then perhaps the index of a corresponding
+ * handler function.
+ */
+#define     TXDIVTIME_IRQENC_BITS   0x0F000000
+#define     TXDIVTIME_IRQENC_S      24
+
+/*
+ * If TXDIVTIME_RPVALID_BIT is set the read pipeline contained significant
+ * information when the thread was interrupted|halted|exceptioned. Each slot
+ * containing data is indicated by a one bit in the corresponding
+ * TXDIVTIME_RPMASK_BITS bit (least significance bit relates to first
+ * location in read pipeline - most likely to have the 1 state). Empty slots
+ * contain zeroes with no interlock applied on reads if RPDIRTY is currently
+ * set with RPMASK itself being read-only state.
+ */
+#define     TXDIVTIME_RPMASK_BITS 0x003F0000   /* -> Full (1) Empty (0) */
+#define     TXDIVTIME_RPMASK_S    16
+
+/*
+ * TXPRIVEXT register can be used to single step thread execution and
+ * enforce synchronous memory i/f address checking for debugging purposes.
+ */
+#define     TXPRIVEXT_TXSTEP_BIT    0x00000004
+#define     TXPRIVEXT_MEMCHECK_BIT  0x00000002
+
+/*
+ * TXINTERNx registers holds internal state information for H/W debugging only
+ */
+#define TXINTERN0_REGNUM 23
+#define     TXINTERN0_LOCK2_BITS  0xF0000000
+#define     TXINTERN0_LOCK2_S     28
+#define     TXINTERN0_LOCK1_BITS  0x0F000000
+#define     TXINTERN0_LOCK1_S     24
+#define     TXINTERN0_TIFDF_BITS  0x0000F000
+#define     TXINTERN0_TIFDF_S     12
+#define     TXINTERN0_TIFIB_BITS  0x00000F00
+#define     TXINTERN0_TIFIB_S     8
+#define     TXINTERN0_TIFAF_BITS  0x000000F0
+#define     TXINTERN0_TIFAF_S     4
+#define     TXINTERN0_MSTATE_BITS 0x0000000F
+#define     TXINTERN0_MSTATE_S    0
+
+/*
+ * TXSTAT, TXMASK, TXPOLL, TXSTATI, TXMASKI, TXPOLLI registers from trigger
+ * bank all have similar contents (upper kick count bits not in MASK regs)
+ */
+#define TXSTAT_REGNUM  0
+#define     TXSTAT_TIMER_BIT    0x00000001
+#define     TXSTAT_TIMER_S      0
+#define     TXSTAT_KICK_BIT     0x00000002
+#define     TXSTAT_KICK_S       1
+#define     TXSTAT_DEFER_BIT    0x00000008
+#define     TXSTAT_DEFER_S      3
+#define     TXSTAT_EXTTRIG_BITS 0x0000FFF0
+#define     TXSTAT_EXTTRIG_S    4
+#define     TXSTAT_FPE_BITS     0x003F0000
+#define     TXSTAT_FPE_S        16
+#define     TXSTAT_FPE_DENORMAL_BIT    0x00200000
+#define     TXSTAT_FPE_DENORMAL_S      21
+#define     TXSTAT_FPE_INVALID_BIT     0x00100000
+#define     TXSTAT_FPE_INVALID_S       20
+#define     TXSTAT_FPE_DIVBYZERO_BIT   0x00080000
+#define     TXSTAT_FPE_DIVBYZERO_S     19
+#define     TXSTAT_FPE_OVERFLOW_BIT    0x00040000
+#define     TXSTAT_FPE_OVERFLOW_S      18
+#define     TXSTAT_FPE_UNDERFLOW_BIT   0x00020000
+#define     TXSTAT_FPE_UNDERFLOW_S     17
+#define     TXSTAT_FPE_INEXACT_BIT     0x00010000
+#define     TXSTAT_FPE_INEXACT_S       16
+#define     TXSTAT_BUSERR_BIT          0x00800000   /* Set if bus error/ack state */
+#define     TXSTAT_BUSERR_S            23
+#define         TXSTAT_BUSSTATE_BITS     0xFF000000 /* Read only */
+#define         TXSTAT_BUSSTATE_S        24
+#define     TXSTAT_KICKCNT_BITS 0xFFFF0000
+#define     TXSTAT_KICKCNT_S    16
+#define TXMASK_REGNUM  1
+#define TXSTATI_REGNUM 2
+#define     TXSTATI_BGNDHALT_BIT    0x00000004
+#define TXMASKI_REGNUM 3
+#define TXPOLL_REGNUM  4
+#define TXPOLLI_REGNUM 6
+
+/*
+ * TXDRCTRL register can be used to partition the DSP RAM space available to
+ * this thread at startup. This is achieved by offsetting the region allocated
+ * to each thread.
+ */
+#define     TXDRCTRL_D1PARTOR_BITS  0x00000F00  /* OR's into top 4 bits */
+#define     TXDRCTRL_D1PARTOR_S     8
+#define     TXDRCTRL_D0PARTOR_BITS  0x0000000F  /* OR's into top 4 bits */
+#define     TXDRCTRL_D0PARTOR_S     0
+/* Given extracted Pow and Or fields this is threads base within DSP RAM */
+#define     TXDRCTRL_DXBASE(Pow, Or)  ((Or)<<((Pow)-4))
+
+/*****************************************************************************
+ *                      RUN TIME TRACE CONTROL REGISTERS
+ ****************************************************************************/
+/*
+ * The following values are only relevant to code that implements run-time
+ *  trace features within the META Core
+ */
+#define TTEXEC      TT.0
+#define TTCTRL      TT.1
+#define TTMARK      TT.2
+#define TTREC       TT.3
+#define GTEXEC      TT.4
+
+#define TTEXEC_REGNUM               0
+#define     TTEXEC_EXTTRIGAND_BITS      0x7F000000
+#define     TTEXEC_EXTTRIGAND_S         24
+#define     TTEXEC_EXTTRIGEN_BIT        0x00008000
+#define     TTEXEC_EXTTRIGMATCH_BITS    0x00007F00
+#define     TTEXEC_EXTTRIGMATCH_S       8
+#define     TTEXEC_TCMODE_BITS          0x00000003
+#define     TTEXEC_TCMODE_S             0
+
+#define TTCTRL_REGNUM               1
+#define     TTCTRL_TRACETT_BITS         0x00008000
+#define     TTCTRL_TRACETT_S            15
+#define     TTCTRL_TRACEALL_BITS        0x00002000
+#define     TTCTRL_TRACEALL_S           13
+#ifdef METAC_2_1
+#define     TTCTRL_TRACEALLTAG_BITS     0x00000400
+#define     TTCTRL_TRACEALLTAG_S        10
+#endif /* METAC_2_1 */
+#define     TTCTRL_TRACETAG_BITS        0x00000200
+#define     TTCTRL_TRACETAG_S           9
+#define     TTCTRL_TRACETTPC_BITS       0x00000080
+#define     TTCTRL_TRACETTPC_S          7
+#define     TTCTRL_TRACEMPC_BITS        0x00000020
+#define     TTCTRL_TRACEMPC_S           5
+#define     TTCTRL_TRACEEN_BITS         0x00000008
+#define     TTCTRL_TRACEEN_S            3
+#define     TTCTRL_TRACEEN1_BITS        0x00000004
+#define     TTCTRL_TRACEEN1_S           2
+#define     TTCTRL_TRACEPC_BITS         0x00000002
+#define     TTCTRL_TRACEPC_S            1
+
+#ifdef METAC_2_1
+#define TTMARK_REGNUM   2
+#define TTMARK_BITS                 0xFFFFFFFF
+#define TTMARK_S                    0x0
+
+#define TTREC_REGNUM    3
+#define TTREC_BITS                  0xFFFFFFFFFFFFFFFF
+#define TTREC_S                     0x0
+#endif /* METAC_2_1 */
+
+#define GTEXEC_REGNUM               4
+#define     GTEXEC_DCRUN_BITS           0x80000000
+#define     GTEXEC_DCRUN_S              31
+#define     GTEXEC_ICMODE_BITS          0x0C000000
+#define     GTEXEC_ICMODE_S             26
+#define     GTEXEC_TCMODE_BITS          0x03000000
+#define     GTEXEC_TCMODE_S             24
+#define     GTEXEC_PERF1CMODE_BITS      0x00040000
+#define     GTEXEC_PERF1CMODE_S         18
+#define     GTEXEC_PERF0CMODE_BITS      0x00010000
+#define     GTEXEC_PERF0CMODE_S         16
+#define     GTEXEC_REFMSEL_BITS         0x0000F000
+#define     GTEXEC_REFMSEL_S            12
+#define     GTEXEC_METRICTH_BITS        0x000003FF
+#define     GTEXEC_METRICTH_S           0
+
+#ifdef METAC_2_1
+/*
+ * Clock Control registers
+ * -----------------------
+ */
+#define TXCLKCTRL_REGNUM        22
+
+/*
+ * Default setting is with clocks always on (DEFON), turning all clocks off
+ * can only be done from external devices (OFF), enabling automatic clock
+ * gating will allow clocks to stop as units fall idle.
+ */
+#define TXCLKCTRL_ALL_OFF       0x02222222
+#define TXCLKCTRL_ALL_DEFON     0x01111111
+#define TXCLKCTRL_ALL_AUTO      0x02222222
+
+/*
+ * Individual fields control caches, floating point and main data/addr units
+ */
+#define TXCLKCTRL_CLOCKIC_BITS  0x03000000
+#define TXCLKCTRL_CLOCKIC_S     24
+#define TXCLKCTRL_CLOCKDC_BITS  0x00300000
+#define TXCLKCTRL_CLOCKDC_S     20
+#define TXCLKCTRL_CLOCKFP_BITS  0x00030000
+#define TXCLKCTRL_CLOCKFP_S     16
+#define TXCLKCTRL_CLOCKD1_BITS  0x00003000
+#define TXCLKCTRL_CLOCKD1_S     12
+#define TXCLKCTRL_CLOCKD0_BITS  0x00000300
+#define TXCLKCTRL_CLOCKD0_S     8
+#define TXCLKCTRL_CLOCKA1_BITS  0x00000030
+#define TXCLKCTRL_CLOCKA1_S     4
+#define TXCLKCTRL_CLOCKA0_BITS  0x00000003
+#define TXCLKCTRL_CLOCKA0_S     0
+
+/*
+ * Individual settings for each field are common
+ */
+#define TXCLKCTRL_CLOCKxx_OFF   0
+#define TXCLKCTRL_CLOCKxx_DEFON 1
+#define TXCLKCTRL_CLOCKxx_AUTO  2
+
+#endif /* METAC_2_1 */
+
+#ifdef METAC_2_1
+/*
+ * Fast interrupt new bits
+ * ------------------------------------
+ */
+#define TXSTATUS_IPTOGGLE_BIT           0x80000000 /* Prev PToggle of TXPRIVEXT */
+#define TXSTATUS_ISTATE_BIT             0x40000000 /* IState bit */
+#define TXSTATUS_IWAIT_BIT              0x20000000 /* wait indefinitely in decision step*/
+#define TXSTATUS_IEXCEPT_BIT            0x10000000 /* Indicate an exception occured */
+#define TXSTATUS_IRPCOUNT_BITS          0x0E000000 /* Number of 'dirty' date entries*/
+#define TXSTATUS_IRPCOUNT_S             25
+#define TXSTATUS_IRQSTAT_BITS           0x0000F000 /* IRQEnc bits, trigger or interrupts */
+#define TXSTATUS_IRQSTAT_S              12
+#define TXSTATUS_LNKSETOK_BIT           0x00000020 /* LNKSetOK bit, successful LNKSET */
+
+/* New fields in TXDE for fast interrupt system */
+#define TXDIVTIME_IACTIVE_BIT           0x00008000 /* Enable new interrupt system */
+#define TXDIVTIME_INONEST_BIT           0x00004000 /* Gate nested interrupt */
+#define TXDIVTIME_IREGIDXGATE_BIT       0x00002000 /* gate of the IRegIdex field */
+#define TXDIVTIME_IREGIDX_BITS          0x00001E00 /* Index of A0.0/1 replaces */
+#define TXDIVTIME_IREGIDX_S             9
+#define TXDIVTIME_NOST_BIT              0x00000100 /* disable superthreading bit */
+#endif
+
+#endif /* _ASM_METAG_REGS_H_ */
diff --git a/arch/metag/include/asm/mman.h b/arch/metag/include/asm/mman.h
new file mode 100644 (file)
index 0000000..17999db
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef __METAG_MMAN_H__
+#define __METAG_MMAN_H__
+
+#include <uapi/asm/mman.h>
+
+#ifndef __ASSEMBLY__
+#define arch_mmap_check metag_mmap_check
+int metag_mmap_check(unsigned long addr, unsigned long len,
+                    unsigned long flags);
+#endif
+#endif /* __METAG_MMAN_H__ */
diff --git a/arch/metag/include/asm/mmu.h b/arch/metag/include/asm/mmu.h
new file mode 100644 (file)
index 0000000..9c32114
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef __MMU_H
+#define __MMU_H
+
+#ifdef CONFIG_METAG_USER_TCM
+#include <linux/list.h>
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+#include <asm/page.h>
+#endif
+
+typedef struct {
+       /* Software pgd base pointer used for Meta 1.x MMU. */
+       unsigned long pgd_base;
+#ifdef CONFIG_METAG_USER_TCM
+       struct list_head tcm;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#if HPAGE_SHIFT < HUGEPT_SHIFT
+       /* last partially filled huge page table address */
+       unsigned long part_huge;
+#endif
+#endif
+} mm_context_t;
+
+/* Given a virtual address, return the pte for the top level 4meg entry
+ * that maps that address.
+ * Returns 0 (an empty pte) if that range is not mapped.
+ */
+unsigned long mmu_read_first_level_page(unsigned long vaddr);
+
+/* Given a linear (virtual) address, return the second level 4k pte
+ * that maps that address.  Returns 0 if the address is not mapped.
+ */
+unsigned long mmu_read_second_level_page(unsigned long vaddr);
+
+/* Get the virtual base address of the MMU */
+unsigned long mmu_get_base(void);
+
+/* Initialize the MMU. */
+void mmu_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META21_MMU
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS0 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS0 if running in global-space.
+ */
+static inline unsigned long mmu_phys0_addr(unsigned int cpu)
+{
+       unsigned long phys0;
+
+       phys0 = (MMCU_T0LOCAL_TABLE_PHYS0 +
+               (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+               (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+       return phys0;
+}
+
+/*
+ * For cpu "cpu" calculate and return the address of the
+ * MMCU_TnLOCAL_TABLE_PHYS1 if running in local-space or
+ * MMCU_TnGLOBAL_TABLE_PHYS1 if running in global-space.
+ */
+static inline unsigned long mmu_phys1_addr(unsigned int cpu)
+{
+       unsigned long phys1;
+
+       phys1 = (MMCU_T0LOCAL_TABLE_PHYS1 +
+               (MMCU_TnX_TABLE_PHYSX_STRIDE * cpu)) +
+               (MMCU_TXG_TABLE_PHYSX_OFFSET * is_global_space(PAGE_OFFSET));
+
+       return phys1;
+}
+#endif /* CONFIG_METAG_META21_MMU */
+
+#endif
diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h
new file mode 100644 (file)
index 0000000..ae2a71b
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef __METAG_MMU_CONTEXT_H
+#define __METAG_MMU_CONTEXT_H
+
+#include <asm-generic/mm_hooks.h>
+
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+#include <linux/io.h>
+
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+                                 struct task_struct *tsk)
+{
+}
+
+static inline int init_new_context(struct task_struct *tsk,
+                                  struct mm_struct *mm)
+{
+#ifndef CONFIG_METAG_META21_MMU
+       /* We use context to store a pointer to the page holding the
+        * pgd of a process while it is running. While a process is not
+        * running the pgd and context fields should be equal.
+        */
+       mm->context.pgd_base = (unsigned long) mm->pgd;
+#endif
+#ifdef CONFIG_METAG_USER_TCM
+       INIT_LIST_HEAD(&mm->context.tcm);
+#endif
+       return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#include <linux/slab.h>
+#include <asm/tcm.h>
+
+static inline void destroy_context(struct mm_struct *mm)
+{
+       struct tcm_allocation *pos, *n;
+
+       list_for_each_entry_safe(pos, n,  &mm->context.tcm, list) {
+               tcm_free(pos->tag, pos->addr, pos->size);
+               list_del(&pos->list);
+               kfree(pos);
+       }
+}
+#else
+#define destroy_context(mm)            do { } while (0)
+#endif
+
+#ifdef CONFIG_METAG_META21_MMU
+static inline void load_pgd(pgd_t *pgd, int thread)
+{
+       unsigned long phys0 = mmu_phys0_addr(thread);
+       unsigned long phys1 = mmu_phys1_addr(thread);
+
+       /*
+        *  0x900 2Gb address space
+        *  The permission bits apply to MMU table region which gives a 2MB
+        *  window into physical memory. We especially don't want userland to be
+        *  able to access this.
+        */
+       metag_out32(0x900 | _PAGE_CACHEABLE | _PAGE_PRIV | _PAGE_WRITE |
+                   _PAGE_PRESENT, phys0);
+       /* Set new MMU base address */
+       metag_out32(__pa(pgd) & MMCU_TBLPHYS1_ADDR_BITS, phys1);
+}
+#endif
+
+static inline void switch_mmu(struct mm_struct *prev, struct mm_struct *next)
+{
+#ifdef CONFIG_METAG_META21_MMU
+       load_pgd(next->pgd, hard_processor_id());
+#else
+       unsigned int i;
+
+       /* prev->context == prev->pgd in the case where we are initially
+          switching from the init task to the first process. */
+       if (prev->context.pgd_base != (unsigned long) prev->pgd) {
+               for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+                       ((pgd_t *) prev->context.pgd_base)[i] = prev->pgd[i];
+       } else
+               prev->pgd = (pgd_t *)mmu_get_base();
+
+       next->pgd = prev->pgd;
+       prev->pgd = (pgd_t *) prev->context.pgd_base;
+
+       for (i = FIRST_USER_PGD_NR; i < USER_PTRS_PER_PGD; i++)
+               next->pgd[i] = ((pgd_t *) next->context.pgd_base)[i];
+
+       flush_cache_all();
+#endif
+       flush_tlb_all();
+}
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+                            struct task_struct *tsk)
+{
+       if (prev != next)
+               switch_mmu(prev, next);
+}
+
+static inline void activate_mm(struct mm_struct *prev_mm,
+                              struct mm_struct *next_mm)
+{
+       switch_mmu(prev_mm, next_mm);
+}
+
+#define deactivate_mm(tsk, mm)   do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/mmzone.h b/arch/metag/include/asm/mmzone.h
new file mode 100644 (file)
index 0000000..9c88a9c
--- /dev/null
@@ -0,0 +1,42 @@
+#ifndef __ASM_METAG_MMZONE_H
+#define __ASM_METAG_MMZONE_H
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+#include <linux/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)         (node_data[nid])
+
+static inline int pfn_to_nid(unsigned long pfn)
+{
+       int nid;
+
+       for (nid = 0; nid < MAX_NUMNODES; nid++)
+               if (pfn >= node_start_pfn(nid) && pfn <= node_end_pfn(nid))
+                       break;
+
+       return nid;
+}
+
+static inline struct pglist_data *pfn_to_pgdat(unsigned long pfn)
+{
+       return NODE_DATA(pfn_to_nid(pfn));
+}
+
+/* arch/metag/mm/numa.c */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end);
+#else
+static inline void
+setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+}
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_NUMA
+/* SoC specific mem init */
+void __init soc_mem_setup(void);
+#else
+static inline void __init soc_mem_setup(void) {};
+#endif
+
+#endif /* __ASM_METAG_MMZONE_H */
diff --git a/arch/metag/include/asm/module.h b/arch/metag/include/asm/module.h
new file mode 100644 (file)
index 0000000..e47e609
--- /dev/null
@@ -0,0 +1,37 @@
+#ifndef _ASM_METAG_MODULE_H
+#define _ASM_METAG_MODULE_H
+
+#include <asm-generic/module.h>
+
+struct metag_plt_entry {
+       /* Indirect jump instruction sequence. */
+       unsigned long tramp[2];
+};
+
+struct mod_arch_specific {
+       /* Indices of PLT sections within module. */
+       unsigned int core_plt_section, init_plt_section;
+};
+
+#if defined CONFIG_METAG_META12
+#define MODULE_PROC_FAMILY "META 1.2 "
+#elif defined CONFIG_METAG_META21
+#define MODULE_PROC_FAMILY "META 2.1 "
+#else
+#define MODULE_PROC_FAMILY ""
+#endif
+
+#ifdef CONFIG_4KSTACKS
+#define MODULE_STACKSIZE "4KSTACKS "
+#else
+#define MODULE_STACKSIZE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE
+
+#ifdef MODULE
+asm(".section .plt,\"ax\",@progbits; .balign 8; .previous");
+asm(".section .init.plt,\"ax\",@progbits; .balign 8; .previous");
+#endif
+
+#endif /* _ASM_METAG_MODULE_H */
diff --git a/arch/metag/include/asm/page.h b/arch/metag/include/asm/page.h
new file mode 100644 (file)
index 0000000..1e8e281
--- /dev/null
@@ -0,0 +1,128 @@
+#ifndef _METAG_PAGE_H
+#define _METAG_PAGE_H
+
+#include <linux/const.h>
+
+#include <asm/metag_mem.h>
+
+/* PAGE_SHIFT determines the page size */
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define PAGE_SHIFT     12
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define PAGE_SHIFT     13
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define PAGE_SHIFT     14
+#endif
+
+#define PAGE_SIZE      (_AC(1, UL) << PAGE_SHIFT)
+#define PAGE_MASK      (~(PAGE_SIZE-1))
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define HPAGE_SHIFT   13
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define HPAGE_SHIFT   14
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define HPAGE_SHIFT   15
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define HPAGE_SHIFT   16
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define HPAGE_SHIFT   17
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define HPAGE_SHIFT   18
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define HPAGE_SHIFT   19
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define HPAGE_SHIFT   20
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define HPAGE_SHIFT   21
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define HPAGE_SHIFT   22
+#endif
+
+#ifdef CONFIG_HUGETLB_PAGE
+# define HPAGE_SIZE            (1UL << HPAGE_SHIFT)
+# define HPAGE_MASK            (~(HPAGE_SIZE-1))
+# define HUGETLB_PAGE_ORDER    (HPAGE_SHIFT-PAGE_SHIFT)
+/*
+ * We define our own hugetlb_get_unmapped_area so we don't corrupt 2nd level
+ * page tables with normal pages in them.
+ */
+# define HUGEPT_SHIFT          (22)
+# define HUGEPT_ALIGN          (1 << HUGEPT_SHIFT)
+# define HUGEPT_MASK           (HUGEPT_ALIGN - 1)
+# define ALIGN_HUGEPT(x)       ALIGN(x, HUGEPT_ALIGN)
+# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* On the Meta, we would like to know if the address (heap) we have is
+ * in local or global space.
+ */
+#define is_global_space(addr)  ((addr) > 0x7fffffff)
+#define is_local_space(addr)   (!is_global_space(addr))
+
+extern void clear_page(void *to);
+extern void copy_page(void *to, void *from);
+
+#define clear_user_page(page, vaddr, pg)        clear_page(page)
+#define copy_user_page(to, from, vaddr, pg)     copy_page(to, from)
+
+/*
+ * These are used to make use of C type-checking..
+ */
+typedef struct { unsigned long pte; } pte_t;
+typedef struct { unsigned long pgd; } pgd_t;
+typedef struct { unsigned long pgprot; } pgprot_t;
+typedef struct page *pgtable_t;
+
+#define pte_val(x)     ((x).pte)
+#define pgd_val(x)     ((x).pgd)
+#define pgprot_val(x)  ((x).pgprot)
+
+#define __pte(x)       ((pte_t) { (x) })
+#define __pgd(x)       ((pgd_t) { (x) })
+#define __pgprot(x)    ((pgprot_t) { (x) })
+
+/* The kernel must now ALWAYS live at either 0xC0000000 or 0x40000000 - that
+ * being either global or local space.
+ */
+#define PAGE_OFFSET            (CONFIG_PAGE_OFFSET)
+
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define META_MEMORY_BASE  LINGLOBAL_BASE
+#define META_MEMORY_LIMIT LINGLOBAL_LIMIT
+#else
+#define META_MEMORY_BASE  LINLOCAL_BASE
+#define META_MEMORY_LIMIT LINLOCAL_LIMIT
+#endif
+
+/* Offset between physical and virtual mapping of kernel memory. */
+extern unsigned int meta_memoffset;
+
+#define __pa(x) ((unsigned long)(((unsigned long)(x)) - meta_memoffset))
+#define __va(x) ((void *)((unsigned long)(((unsigned long)(x)) + meta_memoffset)))
+
+extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET         (pfn_base)
+#define virt_to_page(kaddr)     pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_virt(page)      __va(page_to_pfn(page) << PAGE_SHIFT)
+#define virt_addr_valid(kaddr)  pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#define page_to_phys(page)      (page_to_pfn(page) << PAGE_SHIFT)
+#ifdef CONFIG_FLATMEM
+extern unsigned long max_pfn;
+extern unsigned long min_low_pfn;
+#define pfn_valid(pfn)         ((pfn) >= min_low_pfn && (pfn) < max_pfn)
+#endif
+
+#define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
+
+#define VM_DATA_DEFAULT_FLAGS   (VM_READ | VM_WRITE | VM_EXEC | \
+                                VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+
+#include <asm-generic/memory_model.h>
+#include <asm-generic/getorder.h>
+
+#endif /* __ASSMEBLY__ */
+
+#endif /* _METAG_PAGE_H */
diff --git a/arch/metag/include/asm/perf_event.h b/arch/metag/include/asm/perf_event.h
new file mode 100644 (file)
index 0000000..105bbff
--- /dev/null
@@ -0,0 +1,4 @@
+#ifndef __ASM_METAG_PERF_EVENT_H
+#define __ASM_METAG_PERF_EVENT_H
+
+#endif /* __ASM_METAG_PERF_EVENT_H */
diff --git a/arch/metag/include/asm/pgalloc.h b/arch/metag/include/asm/pgalloc.h
new file mode 100644 (file)
index 0000000..275d928
--- /dev/null
@@ -0,0 +1,79 @@
+#ifndef _METAG_PGALLOC_H
+#define _METAG_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/mm.h>
+
+#define pmd_populate_kernel(mm, pmd, pte) \
+       set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)))
+
+#define pmd_populate(mm, pmd, pte) \
+       set_pmd(pmd, __pmd(_PAGE_TABLE | page_to_phys(pte)))
+
+#define pmd_pgtable(pmd) pmd_page(pmd)
+
+/*
+ * Allocate and free page tables.
+ */
+#ifdef CONFIG_METAG_META21_MMU
+static inline void pgd_ctor(pgd_t *pgd)
+{
+       memcpy(pgd + USER_PTRS_PER_PGD,
+              swapper_pg_dir + USER_PTRS_PER_PGD,
+              (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+}
+#else
+#define pgd_ctor(x)    do { } while (0)
+#endif
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+       pgd_t *pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
+       if (pgd)
+               pgd_ctor(pgd);
+       return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+       free_page((unsigned long)pgd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+                                         unsigned long address)
+{
+       pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT |
+                                             __GFP_ZERO);
+       return pte;
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+                                     unsigned long address)
+{
+       struct page *pte;
+       pte = alloc_pages(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO, 0);
+       if (pte)
+               pgtable_page_ctor(pte);
+       return pte;
+}
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+       free_page((unsigned long)pte);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
+{
+       pgtable_page_dtor(pte);
+       __free_page(pte);
+}
+
+#define __pte_free_tlb(tlb, pte, addr)                         \
+       do {                                                    \
+               pgtable_page_dtor(pte);                         \
+               tlb_remove_page((tlb), (pte));                  \
+       } while (0)
+
+#define check_pgt_cache()      do { } while (0)
+
+#endif
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
new file mode 100644 (file)
index 0000000..1cd13d5
--- /dev/null
@@ -0,0 +1,370 @@
+/*
+ * Macros and functions to manipulate Meta page tables.
+ */
+
+#ifndef _METAG_PGTABLE_H
+#define _METAG_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define CONSISTENT_START       0xF7000000
+#define CONSISTENT_END         0xF73FFFFF
+#define VMALLOC_START          0xF8000000
+#define VMALLOC_END            0xFFFEFFFF
+#else
+#define CONSISTENT_START       0x77000000
+#define CONSISTENT_END         0x773FFFFF
+#define VMALLOC_START          0x78000000
+#define VMALLOC_END            0x7FFFFFFF
+#endif
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT          MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE            MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV             MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE       MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT     MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1    0x020
+#define _PAGE_CACHE_CTRL0      0x040
+#define _PAGE_CACHE_CTRL1      0x080
+#define _PAGE_ALWAYS_ZERO_2    0x100
+#define _PAGE_ALWAYS_ZERO_3    0x200
+#define _PAGE_ALWAYS_ZERO_4    0x400
+#define _PAGE_ALWAYS_ZERO_5    0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used.  Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED         _PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY            _PAGE_ALWAYS_ZERO_2
+#define _PAGE_FILE             _PAGE_ALWAYS_ZERO_3
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL           _PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0       (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1       (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2       (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3       (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE                (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK       (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+                                _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK         (PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT         1
+#define _PAGE_SZ_4K            (0x0)
+#define _PAGE_SZ_8K            (0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K           (0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K           (0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K           (0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K          (0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K          (0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K          (0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M            (0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M            (0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M            (0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK          (0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ               (_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ               (_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ               (_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE            (_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_4M)
+#endif
+
+/*
+ * The Linux memory management assumes a three-level page table setup. On
+ * Meta, we use that, but "fold" the mid level into the top-level page
+ * table.
+ */
+
+/* PGDIR_SHIFT determines the size of the area a second-level page table can
+ * map. This is always 4MB.
+ */
+
+#define PGDIR_SHIFT    22
+#define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK     (~(PGDIR_SIZE-1))
+
+/*
+ * Entries per page directory level: we use a two-level, so
+ * we don't really have any PMD directory physically. First level tables
+ * always map 2Gb (local or global) at a granularity of 4MB, second-level
+ * tables map 4MB with a granularity between 4MB and 4kB (between 1 and
+ * 1024 entries).
+ */
+#define PTRS_PER_PTE   (PGDIR_SIZE/PAGE_SIZE)
+#define HPTRS_PER_PTE  (PGDIR_SIZE/HPAGE_SIZE)
+#define PTRS_PER_PGD   512
+
+#define USER_PTRS_PER_PGD      256
+#define FIRST_USER_ADDRESS     META_MEMORY_BASE
+#define FIRST_USER_PGD_NR      pgd_index(FIRST_USER_ADDRESS)
+
+#define PAGE_NONE      __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+                                _PAGE_CACHEABLE)
+
+#define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_WRITE | \
+                                _PAGE_ACCESSED | _PAGE_CACHEABLE)
+#define PAGE_SHARED_C  PAGE_SHARED
+#define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+                                _PAGE_CACHEABLE)
+#define PAGE_COPY_C    PAGE_COPY
+
+#define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+                                _PAGE_CACHEABLE)
+#define PAGE_KERNEL    __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | \
+                                _PAGE_ACCESSED | _PAGE_WRITE | \
+                                _PAGE_CACHEABLE | _PAGE_KERNEL)
+
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY
+#define __P101 PAGE_READONLY
+#define __P110 PAGE_COPY_C
+#define __P111 PAGE_COPY_C
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY
+#define __S101 PAGE_READONLY
+#define __S110 PAGE_SHARED_C
+#define __S111 PAGE_SHARED_C
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+/* zero page used for uninitialized stuff */
+extern unsigned long empty_zero_page;
+#define ZERO_PAGE(vaddr)       (virt_to_page(empty_zero_page))
+
+/* Certain architectures need to do special things when pte's
+ * within a page table are directly modified.  Thus, the following
+ * hook is made available.
+ */
+#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
+#define set_pte_at(mm, addr, ptep, pteval) set_pte(ptep, pteval)
+
+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval)
+
+#define pte_pfn(pte)           (pte_val(pte) >> PAGE_SHIFT)
+
+#define pfn_pte(pfn, prot)     __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
+
+#define pte_none(x)            (!pte_val(x))
+#define pte_present(x)         (pte_val(x) & _PAGE_PRESENT)
+#define pte_clear(mm, addr, xp)        do { pte_val(*(xp)) = 0; } while (0)
+
+#define pmd_none(x)            (!pmd_val(x))
+#define pmd_bad(x)             ((pmd_val(x) & ~(PAGE_MASK | _PAGE_SZ_MASK)) \
+                                       != (_PAGE_TABLE & ~_PAGE_SZ_MASK))
+#define pmd_present(x)         (pmd_val(x) & _PAGE_PRESENT)
+#define pmd_clear(xp)          do { pmd_val(*(xp)) = 0; } while (0)
+
+#define pte_page(x)            pfn_to_page(pte_pfn(x))
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+
+static inline int pte_write(pte_t pte)   { return pte_val(pte) & _PAGE_WRITE; }
+static inline int pte_dirty(pte_t pte)   { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)   { return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_file(pte_t pte)    { return pte_val(pte) & _PAGE_FILE; }
+static inline int pte_special(pte_t pte) { return 0; }
+
+static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= (~_PAGE_WRITE); return pte; }
+static inline pte_t pte_mkclean(pte_t pte)   { pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkold(pte_t pte)     { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte)   { pte_val(pte) |= _PAGE_WRITE; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte)   { pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte)   { pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) { return pte; }
+static inline pte_t pte_mkhuge(pte_t pte)    { return pte; }
+
+/*
+ * Macro and implementation to make a page protection as uncacheable.
+ */
+#define pgprot_writecombine(prot)                                      \
+       __pgprot(pgprot_val(prot) & ~(_PAGE_CACHE_CTRL1 | _PAGE_CACHE_CTRL0))
+
+#define pgprot_noncached(prot)                                         \
+       __pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)
+
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ */
+
+#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+       pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+       return pte;
+}
+
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+{
+       unsigned long paddr = pmd_val(pmd) & PAGE_MASK;
+       if (!paddr)
+               return 0;
+       return (unsigned long)__va(paddr);
+}
+
+#define pmd_page(pmd)          (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+#define pmd_page_shift(pmd)    (12 + ((pmd_val(pmd) & _PAGE_SZ_MASK) \
+                                       >> _PAGE_SZ_SHIFT))
+#define pmd_num_ptrs(pmd)      (PGDIR_SIZE >> pmd_page_shift(pmd))
+
+/*
+ * Each pgd is only 2k, mapping 2Gb (local or global). If we're in global
+ * space drop the top bit before indexing the pgd.
+ */
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+#define pgd_index(address)     ((((address) & ~0x80000000) >> PGDIR_SHIFT) \
+                                                       & (PTRS_PER_PGD-1))
+#else
+#define pgd_index(address)     (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+#endif
+
+#define pgd_offset(mm, address)        ((mm)->pgd + pgd_index(address))
+
+#define pgd_offset_k(address)  pgd_offset(&init_mm, address)
+
+#define pmd_index(address)     (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+
+/* Find an entry in the second-level page table.. */
+#if !defined(CONFIG_HUGETLB_PAGE)
+  /* all pages are of size (1 << PAGE_SHIFT), so no need to read 1st level pt */
+# define pte_index(pmd, address) \
+       (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#else
+  /* some pages are huge, so read 1st level pt to find out */
+# define pte_index(pmd, address) \
+       (((address) >> pmd_page_shift(pmd)) & (pmd_num_ptrs(pmd) - 1))
+#endif
+#define pte_offset_kernel(dir, address) \
+       ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(*(dir), address))
+#define pte_offset_map(dir, address)           pte_offset_kernel(dir, address)
+#define pte_offset_map_nested(dir, address)    pte_offset_kernel(dir, address)
+
+#define pte_unmap(pte)         do { } while (0)
+#define pte_unmap_nested(pte)  do { } while (0)
+
+#define pte_ERROR(e) \
+       pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pgd_ERROR(e) \
+       pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Meta doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+                                   unsigned long address, pte_t *pte)
+{
+}
+
+/*
+ * Encode and decode a swap entry (must be !pte_none(e) && !pte_present(e))
+ * Since PAGE_PRESENT is bit 1, we can use the bits above that.
+ */
+#define __swp_type(x)                  (((x).val >> 1) & 0xff)
+#define __swp_offset(x)                        ((x).val >> 10)
+#define __swp_entry(type, offset)      ((swp_entry_t) { ((type) << 1) | \
+                                        ((offset) << 10) })
+#define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val(pte) })
+#define __swp_entry_to_pte(x)          ((pte_t) { (x).val })
+
+#define PTE_FILE_MAX_BITS      22
+#define pte_to_pgoff(x)                (pte_val(x) >> 10)
+#define pgoff_to_pte(x)                __pte(((x) << 10) | _PAGE_FILE)
+
+#define kern_addr_valid(addr)  (1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)                \
+       remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()   do { } while (0)
+
+extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+void paging_init(unsigned long mem_end);
+
+#ifdef CONFIG_METAG_META12
+/* This is a workaround for an issue in Meta 1 cores. These cores cache
+ * invalid entries in the TLB so we always need to flush whenever we add
+ * a new pte. Unfortunately we can only flush the whole TLB not shoot down
+ * single entries so this is sub-optimal. This implementation ensures that
+ * we will get a flush at the second attempt, so we may still get repeated
+ * faults, we just don't overflow the kernel stack handling them.
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+({                                                                       \
+       int __changed = !pte_same(*(__ptep), __entry);                    \
+       if (__changed) {                                                  \
+               set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
+       }                                                                 \
+       flush_tlb_page(__vma, __address);                                 \
+       __changed;                                                        \
+})
+#endif
+
+#include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PGTABLE_H */
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
new file mode 100644 (file)
index 0000000..9b029a7
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_PROCESSOR_H
+#define __ASM_METAG_PROCESSOR_H
+
+#include <linux/atomic.h>
+
+#include <asm/page.h>
+#include <asm/ptrace.h>
+#include <asm/metag_regs.h>
+
+/*
+ * Default implementation of macro that returns current
+ * instruction pointer ("program counter").
+ */
+#define current_text_addr() ({ __label__ _l; _l: &&_l; })
+
+/* The task stops where the kernel starts */
+#define TASK_SIZE      PAGE_OFFSET
+/* Add an extra page of padding at the top of the stack for the guard page. */
+#define STACK_TOP      (TASK_SIZE - PAGE_SIZE)
+#define STACK_TOP_MAX  STACK_TOP
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define TASK_UNMAPPED_BASE     META_MEMORY_BASE
+
+typedef struct {
+       unsigned long seg;
+} mm_segment_t;
+
+#ifdef CONFIG_METAG_FPU
+struct meta_fpu_context {
+       TBICTXEXTFPU fpstate;
+       union {
+               struct {
+                       TBICTXEXTBB4 fx8_15;
+                       TBICTXEXTFPACC fpacc;
+               } fx8_15;
+               struct {
+                       TBICTXEXTFPACC fpacc;
+                       TBICTXEXTBB4 unused;
+               } nofx8_15;
+       } extfpstate;
+       bool needs_restore;
+};
+#else
+struct meta_fpu_context {};
+#endif
+
+#ifdef CONFIG_METAG_DSP
+struct meta_ext_context {
+       struct {
+               TBIEXTCTX ctx;
+               TBICTXEXTBB8 bb8;
+               TBIDUAL ax[TBICTXEXTAXX_BYTES / sizeof(TBIDUAL)];
+               TBICTXEXTHL2 hl2;
+               TBICTXEXTTDPR ext;
+               TBICTXEXTRP6 rp;
+       } regs;
+
+       /* DSPRAM A and B save areas. */
+       void *ram[2];
+
+       /* ECH encoded size of DSPRAM save areas. */
+       unsigned int ram_sz[2];
+};
+#else
+struct meta_ext_context {};
+#endif
+
+struct thread_struct {
+       PTBICTX kernel_context;
+       /* A copy of the user process Sig.SaveMask. */
+       unsigned int user_flags;
+       struct meta_fpu_context *fpu_context;
+       void __user *tls_ptr;
+       unsigned short int_depth;
+       unsigned short txdefr_failure;
+       struct meta_ext_context *dsp_context;
+};
+
+#define INIT_THREAD  { \
+       NULL,                   /* kernel_context */    \
+       0,                      /* user_flags */        \
+       NULL,                   /* fpu_context */       \
+       NULL,                   /* tls_ptr */           \
+       1,                      /* int_depth - we start in kernel */    \
+       0,                      /* txdefr_failure */    \
+       NULL,                   /* dsp_context */       \
+}
+
+/* Needed to make #define as we are referencing 'current', that is not visible
+ * yet.
+ *
+ * Stack layout is as below.
+
+      argc            argument counter (integer)
+      argv[0]         program name (pointer)
+      argv[1...N]     program args (pointers)
+      argv[argc-1]    end of args (integer)
+      NULL
+      env[0...N]      environment variables (pointers)
+      NULL
+
+ */
+#define start_thread(regs, pc, usp) do {                                  \
+       unsigned int *argc = (unsigned int *) bprm->exec;                  \
+       set_fs(USER_DS);                                                   \
+       current->thread.int_depth = 1;                                     \
+       /* Force this process down to user land */                         \
+       regs->ctx.SaveMask = TBICTX_PRIV_BIT;                              \
+       regs->ctx.CurrPC = pc;                                             \
+       regs->ctx.AX[0].U0 = usp;                                          \
+       regs->ctx.DX[3].U1 = *((int *)argc);                    /* argc */ \
+       regs->ctx.DX[3].U0 = (int)((int *)argc + 1);            /* argv */ \
+       regs->ctx.DX[2].U1 = (int)((int *)argc +                           \
+                                  regs->ctx.DX[3].U1 + 2);     /* envp */ \
+       regs->ctx.DX[2].U0 = 0;                            /* rtld_fini */ \
+} while (0)
+
+/* Forward declaration, a strange C thing */
+struct task_struct;
+
+/* Free all resources held by a thread. */
+static inline void release_thread(struct task_struct *dead_task)
+{
+}
+
+#define copy_segments(tsk, mm)         do { } while (0)
+#define release_segments(mm)           do { } while (0)
+
+extern void exit_thread(void);
+
+/*
+ * Return saved PC of a blocked thread.
+ */
+#define        thread_saved_pc(tsk)    \
+       ((unsigned long)(tsk)->thread.kernel_context->CurrPC)
+#define thread_saved_sp(tsk)   \
+       ((unsigned long)(tsk)->thread.kernel_context->AX[0].U0)
+#define thread_saved_fp(tsk)   \
+       ((unsigned long)(tsk)->thread.kernel_context->AX[1].U0)
+
+unsigned long get_wchan(struct task_struct *p);
+
+#define        KSTK_EIP(tsk)   ((tsk)->thread.kernel_context->CurrPC)
+#define        KSTK_ESP(tsk)   ((tsk)->thread.kernel_context->AX[0].U0)
+
+#define user_stack_pointer(regs)        ((regs)->ctx.AX[0].U0)
+
+#define cpu_relax()     barrier()
+
+extern void setup_priv(void);
+
+static inline unsigned int hard_processor_id(void)
+{
+       unsigned int id;
+
+       asm volatile ("MOV      %0, TXENABLE\n"
+                     "AND      %0, %0, %1\n"
+                     "LSR      %0, %0, %2\n"
+                     : "=&d" (id)
+                     : "I" (TXENABLE_THREAD_BITS),
+                       "K" (TXENABLE_THREAD_S)
+                     );
+
+       return id;
+}
+
+#define OP3_EXIT       0
+
+#define HALT_OK                0
+#define HALT_PANIC     -1
+
+/*
+ * Halt (stop) the hardware thread. This instruction sequence is the
+ * standard way to cause a Meta hardware thread to exit. The exit code
+ * is pushed onto the stack which is interpreted by the debug adapter.
+ */
+static inline void hard_processor_halt(int exit_code)
+{
+       asm volatile ("MOV      D1Ar1, %0\n"
+                     "MOV      D0Ar6, %1\n"
+                     "MSETL    [A0StP],D0Ar6,D0Ar4,D0Ar2\n"
+                     "1:\n"
+                     "SWITCH   #0xC30006\n"
+                     "B                1b\n"
+                     : : "r" (exit_code), "K" (OP3_EXIT));
+}
+
+/* Set these hooks to call SoC specific code to restart/halt/power off. */
+extern void (*soc_restart)(char *cmd);
+extern void (*soc_halt)(void);
+
+extern void show_trace(struct task_struct *tsk, unsigned long *sp,
+                      struct pt_regs *regs);
+
+#endif
diff --git a/arch/metag/include/asm/prom.h b/arch/metag/include/asm/prom.h
new file mode 100644 (file)
index 0000000..d2aa35d
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ *  arch/metag/include/asm/prom.h
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_METAG_PROM_H
+#define __ASM_METAG_PROM_H
+
+#include <asm/setup.h>
+#define HAVE_ARCH_DEVTREE_FIXUPS
+
+extern struct machine_desc *setup_machine_fdt(void *dt);
+extern void copy_fdt(void);
+
+#endif /* __ASM_METAG_PROM_H */
diff --git a/arch/metag/include/asm/ptrace.h b/arch/metag/include/asm/ptrace.h
new file mode 100644 (file)
index 0000000..fcabc18
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef _METAG_PTRACE_H
+#define _METAG_PTRACE_H
+
+#include <linux/compiler.h>
+#include <uapi/asm/ptrace.h>
+#include <asm/tbx.h>
+
+#ifndef __ASSEMBLY__
+
+/* this struct defines the way the registers are stored on the
+   stack during a system call. */
+
+struct pt_regs {
+       TBICTX ctx;
+       TBICTXEXTCB0 extcb0[5];
+};
+
+#define user_mode(regs) (((regs)->ctx.SaveMask & TBICTX_PRIV_BIT) > 0)
+
+#define instruction_pointer(regs) ((unsigned long)(regs)->ctx.CurrPC)
+#define profile_pc(regs) instruction_pointer(regs)
+
+#define task_pt_regs(task) \
+       ((struct pt_regs *)(task_stack_page(task) + \
+                           sizeof(struct thread_info)))
+
+#define current_pt_regs() \
+       ((struct pt_regs *)((char *)current_thread_info() + \
+                           sizeof(struct thread_info)))
+
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_leave(struct pt_regs *regs);
+
+/* copy a struct user_gp_regs out to user */
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         void *kbuf, void __user *ubuf);
+/* copy a struct user_gp_regs in from user */
+int metag_gp_regs_copyin(struct pt_regs *regs,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf);
+/* copy a struct user_cb_regs out to user */
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         void *kbuf, void __user *ubuf);
+/* copy a struct user_cb_regs in from user */
+int metag_cb_regs_copyin(struct pt_regs *regs,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf);
+/* copy a struct user_rp_state out to user */
+int metag_rp_state_copyout(const struct pt_regs *regs,
+                          unsigned int pos, unsigned int count,
+                          void *kbuf, void __user *ubuf);
+/* copy a struct user_rp_state in from user */
+int metag_rp_state_copyin(struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         const void *kbuf, const void __user *ubuf);
+
+#endif /* __ASSEMBLY__ */
+#endif /* _METAG_PTRACE_H */
diff --git a/arch/metag/include/asm/setup.h b/arch/metag/include/asm/setup.h
new file mode 100644 (file)
index 0000000..e13083b
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _ASM_METAG_SETUP_H
+#define _ASM_METAG_SETUP_H
+
+#include <uapi/asm/setup.h>
+
+void per_cpu_trap_init(unsigned long);
+extern void __init dump_machine_table(void);
+#endif /* _ASM_METAG_SETUP_H */
diff --git a/arch/metag/include/asm/smp.h b/arch/metag/include/asm/smp.h
new file mode 100644 (file)
index 0000000..e0373f8
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef __ASM_SMP_H
+#define __ASM_SMP_H
+
+#include <linux/cpumask.h>
+
+#define raw_smp_processor_id() (current_thread_info()->cpu)
+
+enum ipi_msg_type {
+       IPI_CALL_FUNC,
+       IPI_CALL_FUNC_SINGLE,
+       IPI_RESCHEDULE,
+};
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
+
+asmlinkage void secondary_start_kernel(void);
+
+extern void secondary_startup(void);
+
+#ifdef CONFIG_HOTPLUG_CPU
+extern void __cpu_die(unsigned int cpu);
+extern int __cpu_disable(void);
+extern void cpu_die(void);
+#endif
+
+extern void smp_init_cpus(void);
+#endif /* __ASM_SMP_H */
diff --git a/arch/metag/include/asm/sparsemem.h b/arch/metag/include/asm/sparsemem.h
new file mode 100644 (file)
index 0000000..03fe255
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef __ASM_METAG_SPARSEMEM_H
+#define __ASM_METAG_SPARSEMEM_H
+
+/*
+ * SECTION_SIZE_BITS           2^N: how big each section will be
+ * MAX_PHYSADDR_BITS           2^N: how much physical address space we have
+ * MAX_PHYSMEM_BITS            2^N: how much memory we can have in that space
+ */
+#define SECTION_SIZE_BITS      26
+#define MAX_PHYSADDR_BITS      32
+#define MAX_PHYSMEM_BITS       32
+
+#endif /* __ASM_METAG_SPARSEMEM_H */
diff --git a/arch/metag/include/asm/spinlock.h b/arch/metag/include/asm/spinlock.h
new file mode 100644 (file)
index 0000000..86a7cf3
--- /dev/null
@@ -0,0 +1,22 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+#ifdef CONFIG_METAG_ATOMICITY_LOCK1
+#include <asm/spinlock_lock1.h>
+#else
+#include <asm/spinlock_lnkget.h>
+#endif
+
+#define arch_spin_unlock_wait(lock) \
+       do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+
+#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
+
+#define        arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define        arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)  cpu_relax()
+#define arch_read_relax(lock)  cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
+
+#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/metag/include/asm/spinlock_lnkget.h b/arch/metag/include/asm/spinlock_lnkget.h
new file mode 100644 (file)
index 0000000..ad8436f
--- /dev/null
@@ -0,0 +1,249 @@
+#ifndef __ASM_SPINLOCK_LNKGET_H
+#define __ASM_SPINLOCK_LNKGET_H
+
+/*
+ * None of these asm statements clobber memory as LNKSET writes around
+ * the cache so the memory it modifies cannot safely be read by any means
+ * other than these accessors.
+ */
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+       int ret;
+
+       asm volatile ("LNKGETD  %0, [%1]\n"
+                     "TST      %0, #1\n"
+                     "MOV      %0, #1\n"
+                     "XORZ      %0, %0, %0\n"
+                     : "=&d" (ret)
+                     : "da" (&lock->lock)
+                     : "cc");
+       return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+       int tmp;
+
+       asm volatile ("1:     LNKGETD %0,[%1]\n"
+                     "       TST     %0, #1\n"
+                     "       ADD     %0, %0, #1\n"
+                     "       LNKSETDZ [%1], %0\n"
+                     "       BNZ     1b\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       BNZ     1b\n"
+                     : "=&d" (tmp)
+                     : "da" (&lock->lock)
+                     : "cc");
+
+       smp_mb();
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+       int tmp;
+
+       asm volatile ("       LNKGETD %0,[%1]\n"
+                     "       TST     %0, #1\n"
+                     "       ADD     %0, %0, #1\n"
+                     "       LNKSETDZ [%1], %0\n"
+                     "       BNZ     1f\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       MOV     %0, #1\n"
+                     "1:     XORNZ   %0, %0, %0\n"
+                     : "=&d" (tmp)
+                     : "da" (&lock->lock)
+                     : "cc");
+
+       smp_mb();
+
+       return tmp;
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+       smp_mb();
+
+       asm volatile ("       SETD    [%0], %1\n"
+                     :
+                     : "da" (&lock->lock), "da" (0)
+                     : "memory");
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       asm volatile ("1:     LNKGETD %0,[%1]\n"
+                     "       CMP     %0, #0\n"
+                     "       ADD     %0, %0, %2\n"
+                     "       LNKSETDZ [%1], %0\n"
+                     "       BNZ     1b\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       BNZ     1b\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock), "bd" (0x80000000)
+                     : "cc");
+
+       smp_mb();
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       asm volatile ("       LNKGETD %0,[%1]\n"
+                     "       CMP     %0, #0\n"
+                     "       ADD     %0, %0, %2\n"
+                     "       LNKSETDZ [%1], %0\n"
+                     "       BNZ     1f\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       MOV     %0,#1\n"
+                     "1:     XORNZ   %0, %0, %0\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock), "bd" (0x80000000)
+                     : "cc");
+
+       smp_mb();
+
+       return tmp;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+       smp_mb();
+
+       asm volatile ("       SETD    [%0], %1\n"
+                     :
+                     : "da" (&rw->lock), "da" (0)
+                     : "memory");
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+       int ret;
+
+       asm volatile ("LNKGETD  %0, [%1]\n"
+                     "CMP      %0, #0\n"
+                     "MOV      %0, #1\n"
+                     "XORNZ     %0, %0, %0\n"
+                     : "=&d" (ret)
+                     : "da" (&rw->lock)
+                     : "cc");
+       return ret;
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       asm volatile ("1:     LNKGETD %0,[%1]\n"
+                     "       ADDS    %0, %0, #1\n"
+                     "       LNKSETDPL [%1], %0\n"
+                     "       BMI     1b\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       BNZ     1b\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock)
+                     : "cc");
+
+       smp_mb();
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       smp_mb();
+
+       asm volatile ("1:     LNKGETD %0,[%1]\n"
+                     "       SUB     %0, %0, #1\n"
+                     "       LNKSETD [%1], %0\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       BNZ     1b\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock)
+                     : "cc", "memory");
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       asm volatile ("       LNKGETD %0,[%1]\n"
+                     "       ADDS    %0, %0, #1\n"
+                     "       LNKSETDPL [%1], %0\n"
+                     "       BMI     1f\n"
+                     "       DEFR    %0, TXSTAT\n"
+                     "       ANDT    %0, %0, #HI(0x3f000000)\n"
+                     "       CMPT    %0, #HI(0x02000000)\n"
+                     "       MOV     %0,#1\n"
+                     "       BZ      2f\n"
+                     "1:     MOV     %0,#0\n"
+                     "2:\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock)
+                     : "cc");
+
+       smp_mb();
+
+       return tmp;
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+       int tmp;
+
+       asm volatile ("LNKGETD  %0, [%1]\n"
+                     "CMP      %0, %2\n"
+                     "MOV      %0, #1\n"
+                     "XORZ     %0, %0, %0\n"
+                     : "=&d" (tmp)
+                     : "da" (&rw->lock), "bd" (0x80000000)
+                     : "cc");
+       return tmp;
+}
+
+#define        arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define        arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)  cpu_relax()
+#define arch_read_relax(lock)  cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
+
+#endif /* __ASM_SPINLOCK_LNKGET_H */
diff --git a/arch/metag/include/asm/spinlock_lock1.h b/arch/metag/include/asm/spinlock_lock1.h
new file mode 100644 (file)
index 0000000..c630444
--- /dev/null
@@ -0,0 +1,184 @@
+#ifndef __ASM_SPINLOCK_LOCK1_H
+#define __ASM_SPINLOCK_LOCK1_H
+
+#include <asm/bug.h>
+#include <asm/global_lock.h>
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+       int ret;
+
+       barrier();
+       ret = lock->lock;
+       WARN_ON(ret != 0 && ret != 1);
+       return ret;
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+       unsigned int we_won = 0;
+       unsigned long flags;
+
+again:
+       __global_lock1(flags);
+       if (lock->lock == 0) {
+               fence();
+               lock->lock = 1;
+               we_won = 1;
+       }
+       __global_unlock1(flags);
+       if (we_won == 0)
+               goto again;
+       WARN_ON(lock->lock != 1);
+}
+
+/* Returns 0 if failed to acquire lock */
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+       unsigned long flags;
+       unsigned int ret;
+
+       __global_lock1(flags);
+       ret = lock->lock;
+       if (ret == 0) {
+               fence();
+               lock->lock = 1;
+       }
+       __global_unlock1(flags);
+       return (ret == 0);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+       barrier();
+       WARN_ON(!lock->lock);
+       lock->lock = 0;
+}
+
+/*
+ * RWLOCKS
+ *
+ *
+ * Write locks are easy - we just set bit 31.  When unlocking, we can
+ * just write zero since the lock is exclusively held.
+ */
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+       unsigned long flags;
+       unsigned int we_won = 0;
+
+again:
+       __global_lock1(flags);
+       if (rw->lock == 0) {
+               fence();
+               rw->lock = 0x80000000;
+               we_won = 1;
+       }
+       __global_unlock1(flags);
+       if (we_won == 0)
+               goto again;
+       WARN_ON(rw->lock != 0x80000000);
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+       unsigned long flags;
+       unsigned int ret;
+
+       __global_lock1(flags);
+       ret = rw->lock;
+       if (ret == 0) {
+               fence();
+               rw->lock = 0x80000000;
+       }
+       __global_unlock1(flags);
+
+       return (ret == 0);
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+       barrier();
+       WARN_ON(rw->lock != 0x80000000);
+       rw->lock = 0;
+}
+
+/* write_can_lock - would write_trylock() succeed? */
+static inline int arch_write_can_lock(arch_rwlock_t *rw)
+{
+       unsigned int ret;
+
+       barrier();
+       ret = rw->lock;
+       return (ret == 0);
+}
+
+/*
+ * Read locks are a bit more hairy:
+ *  - Exclusively load the lock value.
+ *  - Increment it.
+ *  - Store new lock value if positive, and we still own this location.
+ *    If the value is negative, we've already failed.
+ *  - If we failed to store the value, we want a negative result.
+ *  - If we failed, try again.
+ * Unlocking is similarly hairy.  We may have multiple read locks
+ * currently active.  However, we know we won't have any write
+ * locks.
+ */
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+       unsigned long flags;
+       unsigned int we_won = 0, ret;
+
+again:
+       __global_lock1(flags);
+       ret = rw->lock;
+       if (ret < 0x80000000) {
+               fence();
+               rw->lock = ret + 1;
+               we_won = 1;
+       }
+       __global_unlock1(flags);
+       if (!we_won)
+               goto again;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+       unsigned long flags;
+       unsigned int ret;
+
+       __global_lock1(flags);
+       fence();
+       ret = rw->lock--;
+       __global_unlock1(flags);
+       WARN_ON(ret == 0);
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+       unsigned long flags;
+       unsigned int ret;
+
+       __global_lock1(flags);
+       ret = rw->lock;
+       if (ret < 0x80000000) {
+               fence();
+               rw->lock = ret + 1;
+       }
+       __global_unlock1(flags);
+       return (ret < 0x80000000);
+}
+
+/* read_can_lock - would read_trylock() succeed? */
+static inline int arch_read_can_lock(arch_rwlock_t *rw)
+{
+       unsigned int ret;
+
+       barrier();
+       ret = rw->lock;
+       return (ret < 0x80000000);
+}
+
+#endif /* __ASM_SPINLOCK_LOCK1_H */
diff --git a/arch/metag/include/asm/spinlock_types.h b/arch/metag/include/asm/spinlock_types.h
new file mode 100644 (file)
index 0000000..b763914
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef _ASM_METAG_SPINLOCK_TYPES_H
+#define _ASM_METAG_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+       volatile unsigned int lock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED      { 0 }
+
+typedef struct {
+       volatile unsigned int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED                { 0 }
+
+#endif /* _ASM_METAG_SPINLOCK_TYPES_H */
diff --git a/arch/metag/include/asm/stacktrace.h b/arch/metag/include/asm/stacktrace.h
new file mode 100644 (file)
index 0000000..2830a0f
--- /dev/null
@@ -0,0 +1,20 @@
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+struct stackframe {
+       unsigned long fp;
+       unsigned long sp;
+       unsigned long lr;
+       unsigned long pc;
+};
+
+struct metag_frame {
+       unsigned long fp;
+       unsigned long lr;
+};
+
+extern int unwind_frame(struct stackframe *frame);
+extern void walk_stackframe(struct stackframe *frame,
+                           int (*fn)(struct stackframe *, void *), void *data);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/metag/include/asm/string.h b/arch/metag/include/asm/string.h
new file mode 100644 (file)
index 0000000..53e3806
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef _METAG_STRING_H_
+#define _METAG_STRING_H_
+
+#define __HAVE_ARCH_MEMSET
+extern void *memset(void *__s, int __c, size_t __count);
+
+#define __HAVE_ARCH_MEMCPY
+void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+#define __HAVE_ARCH_MEMMOVE
+extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#endif /* _METAG_STRING_H_ */
diff --git a/arch/metag/include/asm/switch.h b/arch/metag/include/asm/switch.h
new file mode 100644 (file)
index 0000000..1fd6a58
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _ASM_METAG_SWITCH_H
+#define _ASM_METAG_SWITCH_H
+
+/* metag SWITCH codes */
+#define __METAG_SW_PERM_BREAK  0x400002        /* compiled in breakpoint */
+#define __METAG_SW_SYS_LEGACY  0x440000        /* legacy system calls */
+#define __METAG_SW_SYS         0x440001        /* system calls */
+
+/* metag SWITCH instruction encoding */
+#define __METAG_SW_ENCODING(TYPE)      (0xaf000000 | (__METAG_SW_##TYPE))
+
+#endif /* _ASM_METAG_SWITCH_H */
diff --git a/arch/metag/include/asm/syscall.h b/arch/metag/include/asm/syscall.h
new file mode 100644 (file)
index 0000000..24fc979
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Access to user system call parameters and results
+ *
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ *
+ * See asm-generic/syscall.h for descriptions of what we must do here.
+ */
+
+#ifndef _ASM_METAG_SYSCALL_H
+#define _ASM_METAG_SYSCALL_H
+
+#include <linux/sched.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+
+#include <asm/switch.h>
+
+static inline long syscall_get_nr(struct task_struct *task,
+                                 struct pt_regs *regs)
+{
+       unsigned long insn;
+
+       /*
+        * FIXME there's no way to find out how we got here other than to
+        * examine the memory at the PC to see if it is a syscall
+        * SWITCH instruction.
+        */
+       if (get_user(insn, (unsigned long *)(regs->ctx.CurrPC - 4)))
+               return -1;
+
+       if (insn == __METAG_SW_ENCODING(SYS))
+               return regs->ctx.DX[0].U1;
+       else
+               return -1L;
+}
+
+static inline void syscall_rollback(struct task_struct *task,
+                                   struct pt_regs *regs)
+{
+       /* do nothing */
+}
+
+static inline long syscall_get_error(struct task_struct *task,
+                                    struct pt_regs *regs)
+{
+       unsigned long error = regs->ctx.DX[0].U0;
+       return IS_ERR_VALUE(error) ? error : 0;
+}
+
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->ctx.DX[0].U0;
+}
+
+static inline void syscall_set_return_value(struct task_struct *task,
+                                           struct pt_regs *regs,
+                                           int error, long val)
+{
+       regs->ctx.DX[0].U0 = (long) error ?: val;
+}
+
+static inline void syscall_get_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        unsigned long *args)
+{
+       unsigned int reg, j;
+       BUG_ON(i + n > 6);
+
+       for (j = i, reg = 6 - i; j < (i + n); j++, reg--) {
+               if (reg % 2)
+                       args[j] = regs->ctx.DX[(reg + 1) / 2].U0;
+               else
+                       args[j] = regs->ctx.DX[reg / 2].U1;
+       }
+}
+
+static inline void syscall_set_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        const unsigned long *args)
+{
+       unsigned int reg;
+       BUG_ON(i + n > 6);
+
+       for (reg = 6 - i; i < (i + n); i++, reg--) {
+               if (reg % 2)
+                       regs->ctx.DX[(reg + 1) / 2].U0 = args[i];
+               else
+                       regs->ctx.DX[reg / 2].U1 = args[i];
+       }
+}
+
+#define NR_syscalls __NR_syscalls
+
+/* generic syscall table */
+extern const void *sys_call_table[];
+
+#endif /* _ASM_METAG_SYSCALL_H */
diff --git a/arch/metag/include/asm/syscalls.h b/arch/metag/include/asm/syscalls.h
new file mode 100644 (file)
index 0000000..a02b955
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef _ASM_METAG_SYSCALLS_H
+#define _ASM_METAG_SYSCALLS_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <linux/types.h>
+#include <linux/signal.h>
+
+/* kernel/signal.c */
+#define sys_rt_sigreturn sys_rt_sigreturn
+asmlinkage long sys_rt_sigreturn(void);
+
+#include <asm-generic/syscalls.h>
+
+/* kernel/sys_metag.c */
+asmlinkage int sys_metag_setglobalbit(char __user *, int);
+asmlinkage void sys_metag_set_fpu_flags(unsigned int);
+asmlinkage int sys_metag_set_tls(void __user *);
+asmlinkage void *sys_metag_get_tls(void);
+
+asmlinkage long sys_truncate64_metag(const char __user *, unsigned long,
+                                    unsigned long);
+asmlinkage long sys_ftruncate64_metag(unsigned int, unsigned long,
+                                     unsigned long);
+asmlinkage long sys_fadvise64_64_metag(int, unsigned long, unsigned long,
+                                      unsigned long, unsigned long, int);
+asmlinkage long sys_readahead_metag(int, unsigned long, unsigned long, size_t);
+asmlinkage ssize_t sys_pread64_metag(unsigned long, char __user *, size_t,
+                                    unsigned long, unsigned long);
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long, char __user *, size_t,
+                                     unsigned long, unsigned long);
+asmlinkage long sys_sync_file_range_metag(int, unsigned long, unsigned long,
+                                         unsigned long, unsigned long,
+                                         unsigned int);
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+                   int syscall);
+
+#endif /* _ASM_METAG_SYSCALLS_H */
diff --git a/arch/metag/include/asm/tbx.h b/arch/metag/include/asm/tbx.h
new file mode 100644 (file)
index 0000000..287b36f
--- /dev/null
@@ -0,0 +1,1425 @@
+/*
+ * asm/tbx.h
+ *
+ * Copyright (C) 2000-2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Thread binary interface header
+ */
+
+#ifndef _ASM_METAG_TBX_H_
+#define _ASM_METAG_TBX_H_
+
+/* for CACHEW_* values */
+#include <asm/metag_isa.h>
+/* for LINSYSEVENT_* addresses */
+#include <asm/metag_mem.h>
+
+#ifdef  TBI_1_4
+#ifndef TBI_MUTEXES_1_4
+#define TBI_MUTEXES_1_4
+#endif
+#ifndef TBI_SEMAPHORES_1_4
+#define TBI_SEMAPHORES_1_4
+#endif
+#ifndef TBI_ASYNC_SWITCH_1_4
+#define TBI_ASYNC_SWITCH_1_4
+#endif
+#ifndef TBI_FASTINT_1_4
+#define TBI_FASTINT_1_4
+#endif
+#endif
+
+
+/* Id values in the TBI system describe a segment using an arbitrary
+   integer value and flags in the bottom 8 bits, the SIGPOLL value is
+   used in cases where control over blocking or polling behaviour is
+   needed. */
+#define TBID_SIGPOLL_BIT    0x02 /* Set bit in an Id value to poll vs block */
+/* Extended segment identifiers use strings in the string table */
+#define TBID_IS_SEGSTR( Id ) (((Id) & (TBID_SEGTYPE_BITS>>1)) == 0)
+
+/* Segment identifiers contain the following related bit-fields */
+#define TBID_SEGTYPE_BITS   0x0F /* One of the predefined segment types */
+#define TBID_SEGTYPE_S      0
+#define TBID_SEGSCOPE_BITS  0x30 /* Indicates the scope of the segment */
+#define TBID_SEGSCOPE_S     4
+#define TBID_SEGGADDR_BITS  0xC0 /* Indicates access possible via pGAddr */
+#define TBID_SEGGADDR_S     6
+
+/* Segments of memory can only really contain a few types of data */
+#define TBID_SEGTYPE_TEXT   0x02 /* Code segment */
+#define TBID_SEGTYPE_DATA   0x04 /* Data segment */
+#define TBID_SEGTYPE_STACK  0x06 /* Stack segment */
+#define TBID_SEGTYPE_HEAP   0x0A /* Heap segment */
+#define TBID_SEGTYPE_ROOT   0x0C /* Root block segments */
+#define TBID_SEGTYPE_STRING 0x0E /* String table segment */
+
+/* Segments have one of three possible scopes */
+#define TBID_SEGSCOPE_INIT     0 /* Temporary area for initialisation phase */
+#define TBID_SEGSCOPE_LOCAL    1 /* Private to this thread */
+#define TBID_SEGSCOPE_GLOBAL   2 /* Shared globally throughout the system */
+#define TBID_SEGSCOPE_SHARED   3 /* Limited sharing between local/global */
+
+/* For segment specifier a further field in two of the remaining bits
+   indicates the usefulness of the pGAddr field in the segment descriptor
+   descriptor. */
+#define TBID_SEGGADDR_NULL     0 /* pGAddr is NULL -> SEGSCOPE_(LOCAL|INIT) */
+#define TBID_SEGGADDR_READ     1 /* Only read    via pGAddr */
+#define TBID_SEGGADDR_WRITE    2 /* Full access  via pGAddr */
+#define TBID_SEGGADDR_EXEC     3 /* Only execute via pGAddr */
+
+/* The following values are common to both segment and signal Id value and
+   live in the top 8 bits of the Id values. */
+
+/* The ISTAT bit indicates if segments are related to interrupt vs
+   background level interfaces a thread can still handle all triggers at
+   either level, but can also split these up if it wants to. */
+#define TBID_ISTAT_BIT    0x01000000
+#define TBID_ISTAT_S      24
+
+/* Privilege needed to access a segment is indicated by the next bit.
+   
+   This bit is set to mirror the current privilege level when starting a
+   search for a segment - setting it yourself toggles the automatically
+   generated state which is only useful to emulate unprivileged behaviour
+   or access unprivileged areas of memory while at privileged level. */
+#define TBID_PSTAT_BIT    0x02000000
+#define TBID_PSTAT_S      25
+
+/* The top six bits of a signal/segment specifier identifies a thread within
+   the system. This represents a segments owner. */
+#define TBID_THREAD_BITS  0xFC000000
+#define TBID_THREAD_S     26
+
+/* Special thread id values */
+#define TBID_THREAD_NULL   (-32) /* Never matches any thread/segment id used */
+#define TBID_THREAD_GLOBAL (-31) /* Things global to all threads */
+#define TBID_THREAD_HOST   ( -1) /* Host interface */
+#define TBID_THREAD_EXTIO  (TBID_THREAD_HOST)   /* Host based ExtIO i/f */
+
+/* Virtual Id's are used for external thread interface structures or the
+   above special Id's */
+#define TBID_IS_VIRTTHREAD( Id ) ((Id) < 0)
+
+/* Real Id's are used for actual hardware threads that are local */
+#define TBID_IS_REALTHREAD( Id ) ((Id) >= 0)
+
+/* Generate a segment Id given Thread, Scope, and Type */
+#define TBID_SEG( Thread, Scope, Type )                           (\
+    ((Thread)<<TBID_THREAD_S) + ((Scope)<<TBID_SEGSCOPE_S) + (Type))
+
+/* Generate a signal Id given Thread and SigNum */
+#define TBID_SIG( Thread, SigNum )                                        (\
+    ((Thread)<<TBID_THREAD_S) + ((SigNum)<<TBID_SIGNUM_S) + TBID_SIGNAL_BIT)
+
+/* Generate an Id that solely represents a thread - useful for cache ops */
+#define TBID_THD( Thread ) ((Thread)<<TBID_THREAD_S)
+#define TBID_THD_NULL      ((TBID_THREAD_NULL)  <<TBID_THREAD_S)
+#define TBID_THD_GLOBAL    ((TBID_THREAD_GLOBAL)<<TBID_THREAD_S)
+
+/* Common exception handler (see TBID_SIGNUM_XXF below) receives hardware
+   generated fault codes TBIXXF_SIGNUM_xxF in it's SigNum parameter */
+#define TBIXXF_SIGNUM_IIF   0x01 /* General instruction fault */
+#define TBIXXF_SIGNUM_PGF   0x02 /* Privilege general fault */
+#define TBIXXF_SIGNUM_DHF   0x03 /* Data access watchpoint HIT */
+#define TBIXXF_SIGNUM_IGF   0x05 /* Code fetch general read failure */
+#define TBIXXF_SIGNUM_DGF   0x07 /* Data access general read/write fault */
+#define TBIXXF_SIGNUM_IPF   0x09 /* Code fetch page fault */
+#define TBIXXF_SIGNUM_DPF   0x0B /* Data access page fault */
+#define TBIXXF_SIGNUM_IHF   0x0D /* Instruction breakpoint HIT */
+#define TBIXXF_SIGNUM_DWF   0x0F /* Data access read-only fault */
+
+/* Hardware signals communicate events between processing levels within a
+   single thread all the _xxF cases are exceptions and are routed via a
+   common exception handler, _SWx are software trap events and kicks including
+   __TBISignal generated kicks, and finally _TRx are hardware triggers */
+#define TBID_SIGNUM_SW0     0x00 /* SWITCH GROUP 0 - Per thread user */
+#define TBID_SIGNUM_SW1     0x01 /* SWITCH GROUP 1 - Per thread system */
+#define TBID_SIGNUM_SW2     0x02 /* SWITCH GROUP 2 - Internal global request */
+#define TBID_SIGNUM_SW3     0x03 /* SWITCH GROUP 3 - External global request */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_FPE     0x04 /* Deferred exception - Any IEEE 754 exception */
+#define TBID_SIGNUM_FPD     0x05 /* Deferred exception - Denormal exception */
+/* Reserved 0x6 for a reserved deferred exception */
+#define TBID_SIGNUM_BUS     0x07 /* Deferred exception - Bus Error */
+/* Reserved 0x08-0x09 */
+#else
+/* Reserved 0x04-0x09 */
+#endif
+#define TBID_SIGNUM_SWS     0x0A /* KICK received with SigMask != 0 */
+#define TBID_SIGNUM_SWK     0x0B /* KICK received with SigMask == 0 */
+/* Reserved 0x0C-0x0F */
+#define TBID_SIGNUM_TRT     0x10 /* Timer trigger */
+#define TBID_SIGNUM_LWK     0x11 /* Low level kick (handler provided by TBI) */
+#define TBID_SIGNUM_XXF     0x12 /* Fault handler - receives ALL _xxF sigs */
+#ifdef TBI_1_4
+#define TBID_SIGNUM_DFR     0x13 /* Deferred Exception handler */
+#else
+#define TBID_SIGNUM_FPE     0x13 /* FPE Exception handler */
+#endif
+/* External trigger one group 0x14 to 0x17 - per thread */
+#define TBID_SIGNUM_TR1(Thread) (0x14+(Thread))
+#define TBID_SIGNUM_T10     0x14
+#define TBID_SIGNUM_T11     0x15
+#define TBID_SIGNUM_T12     0x16
+#define TBID_SIGNUM_T13     0x17
+/* External trigger two group 0x18 to 0x1b - per thread */
+#define TBID_SIGNUM_TR2(Thread) (0x18+(Thread))
+#define TBID_SIGNUM_T20     0x18
+#define TBID_SIGNUM_T21     0x19
+#define TBID_SIGNUM_T22     0x1A
+#define TBID_SIGNUM_T23     0x1B
+#define TBID_SIGNUM_TR3     0x1C /* External trigger N-4 (global) */
+#define TBID_SIGNUM_TR4     0x1D /* External trigger N-3 (global) */
+#define TBID_SIGNUM_TR5     0x1E /* External trigger N-2 (global) */
+#define TBID_SIGNUM_TR6     0x1F /* External trigger N-1 (global) */
+#define TBID_SIGNUM_MAX     0x1F
+
+/* Return the trigger register(TXMASK[I]/TXSTAT[I]) bits related to
+   each hardware signal, sometimes this is a many-to-one relationship. */
+#define TBI_TRIG_BIT(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_TRT) ? 1<<((SigNum)-TBID_SIGNUM_TRT) :\
+    ( ((SigNum) == TBID_SIGNUM_SWS) ||                             \
+      ((SigNum) == TBID_SIGNUM_SWK)    ) ?                         \
+                         TXSTAT_KICK_BIT : TXSTATI_BGNDHALT_BIT    )
+
+/* Return the hardware trigger vector number for entries in the
+   HWVEC0EXT table that will generate the required internal trigger. */
+#define TBI_TRIG_VEC(SigNum)                                      (\
+    ((SigNum) >= TBID_SIGNUM_T10) ? ((SigNum)-TBID_SIGNUM_TRT) : -1)
+
+/* Default trigger masks for each thread at background/interrupt level */
+#define TBI_TRIGS_INIT( Thread )                           (\
+    TXSTAT_KICK_BIT + TBI_TRIG_BIT(TBID_SIGNUM_TR1(Thread)) )
+#define TBI_INTS_INIT( Thread )                            (\
+    TXSTAT_KICK_BIT + TXSTATI_BGNDHALT_BIT                  \
+                    + TBI_TRIG_BIT(TBID_SIGNUM_TR2(Thread)) )
+
+#ifndef __ASSEMBLY__
+/* A spin-lock location is a zero-initialised location in memory */
+typedef volatile int TBISPIN, *PTBISPIN;
+
+/* A kick location is a hardware location you can write to
+ * in order to cause a kick
+ */
+typedef volatile int *PTBIKICK;
+
+#if defined(METAC_1_0) || defined(METAC_1_1)
+/* Macro to perform a kick */
+#define TBI_KICK( pKick ) do { pKick[0] = 1; } while (0)
+#else
+/* #define METAG_LIN_VALUES before including machine.h if required */
+#ifdef LINSYSEVENT_WR_COMBINE_FLUSH
+/* Macro to perform a kick - write combiners must be flushed */
+#define TBI_KICK( pKick )                                                do {\
+    volatile int *pFlush = (volatile int *) LINSYSEVENT_WR_COMBINE_FLUSH;    \
+    pFlush[0] = 0;                                                           \
+    pKick[0]  = 1;                                                } while (0)
+#endif
+#endif /* if defined(METAC_1_0) || defined(METAC_1_1) */
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* 64-bit dual unit state value */
+typedef struct _tbidual_tag_ {
+    /* 32-bit value from a pair of registers in data or address units */
+    int U0, U1;
+} TBIDUAL, *PTBIDUAL;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIDUAL */
+#define TBIDUAL_U0      (0)
+#define TBIDUAL_U1      (4)
+
+#define TBIDUAL_BYTES   (8)
+
+#define TBICTX_CRIT_BIT 0x0001  /* ASync state saved in TBICTX */
+#define TBICTX_SOFT_BIT 0x0002  /* Sync state saved in TBICTX (other bits 0) */
+#ifdef TBI_FASTINT_1_4
+#define TBICTX_FINT_BIT 0x0004  /* Using Fast Interrupts */
+#endif
+#define TBICTX_FPAC_BIT 0x0010  /* FPU state in TBICTX, FPU active on entry */
+#define TBICTX_XMCC_BIT 0x0020  /* Bit to identify a MECC task */
+#define TBICTX_CBUF_BIT 0x0040  /* Hardware catch buffer flag from TXSTATUS */
+#define TBICTX_CBRP_BIT 0x0080  /* Read pipeline dirty from TXDIVTIME */
+#define TBICTX_XDX8_BIT 0x0100  /* Saved DX.8 to DX.15 too */
+#define TBICTX_XAXX_BIT 0x0200  /* Save remaining AX registers to AX.7 */
+#define TBICTX_XHL2_BIT 0x0400  /* Saved hardware loop registers too */
+#define TBICTX_XTDP_BIT 0x0800  /* Saved DSP registers too */
+#define TBICTX_XEXT_BIT 0x1000  /* Set if TBICTX.Ext.Ctx contains extended
+                                   state save area, otherwise TBICTX.Ext.AX2
+                                   just holds normal A0.2 and A1.2 states */
+#define TBICTX_WAIT_BIT 0x2000  /* Causes wait for trigger - sticky toggle */
+#define TBICTX_XCBF_BIT 0x4000  /* Catch buffer or RD extracted into TBICTX */
+#define TBICTX_PRIV_BIT 0x8000  /* Set if system uses 'privileged' model */
+
+#ifdef METAC_1_0
+#define TBICTX_XAX3_BIT 0x0200  /* Saved AX.5 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  5       /* Ax.0 to Ax.4 are core GP regs on CHORUS */
+#else
+#define TBICTX_XAX4_BIT 0x0200  /* Saved AX.4 to AX.7 for XAXX */
+#define TBICTX_AX_REGS  4       /* Default is Ax.0 to Ax.3 */
+#endif
+
+#ifdef TBI_1_4
+#define TBICTX_CFGFPU_FX16_BIT  0x00010000               /* Save FX.8 to FX.15 too */
+
+/* The METAC_CORE_ID_CONFIG field indicates omitted DSP resources */
+#define METAC_COREID_CFGXCTX_MASK( Value )                                 (\
+       ( (((Value & METAC_COREID_CFGDSP_BITS)>>                                \
+                    METAC_COREID_CFGDSP_S      ) == METAC_COREID_CFGDSP_MIN) ? \
+                ~(TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+                             \
+                  TBICTX_XAXX_BIT+TBICTX_XDX8_BIT ) : ~0U )                    )
+#endif
+
+/* Extended context state provides a standardised method for registering the
+   arguments required by __TBICtxSave to save the additional register states
+   currently in use by non general purpose code. The state of the __TBIExtCtx
+   variable in the static space of the thread forms an extension of the base
+   context of the thread.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask == 0 ) then pExt is assumed to be NULL and
+   the empty state of  __TBIExtCtx is represented by the fact that
+   TBICTX.SaveMask does not have the bit TBICTX_XEXT_BIT set.
+   
+   If ( __TBIExtCtx.Ctx.SaveMask != 0 ) then pExt should point at a suitably
+   sized extended context save area (usually at the end of the stack space
+   allocated by the current routine). This space should allow for the
+   displaced state of A0.2 and A1.2 to be saved along with the other extended
+   states indicated via __TBIExtCtx.Ctx.SaveMask. */
+#ifndef __ASSEMBLY__
+typedef union _tbiextctx_tag_ {
+    long long Val;
+    TBIDUAL AX2;
+    struct _tbiextctxext_tag {
+#ifdef TBI_1_4
+        short DspramSizes;      /* DSPRAM sizes. Encoding varies between
+                                   TBICtxAlloc and the ECH scheme. */
+#else
+        short Reserved0;
+#endif
+        short SaveMask;         /* Flag bits for state saved */
+        PTBIDUAL pExt;          /* AX[2] state saved first plus Xxxx state */
+    
+    } Ctx;
+    
+} TBIEXTCTX, *PTBIEXTCTX;
+
+/* Automatic registration of extended context save for __TBINestInts */
+extern TBIEXTCTX __TBIExtCtx;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBIEXTCTX */
+#define TBIEXTCTX_AX2           (0)
+#define TBIEXTCTX_Ctx           (0)
+#define TBIEXTCTX_Ctx_SaveMask  (TBIEXTCTX_Ctx + 2)
+#define TBIEXTCTX_Ctx_pExt      (TBIEXTCTX_Ctx + 2 + 2)
+
+/* Extended context data size calculation constants */
+#define TBICTXEXT_BYTES          (8)
+#define TBICTXEXTBB8_BYTES     (8*8)
+#define TBICTXEXTAX3_BYTES     (3*8)
+#define TBICTXEXTAX4_BYTES     (4*8)
+#ifdef METAC_1_0
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX3_BYTES
+#else
+#define TBICTXEXTAXX_BYTES     TBICTXEXTAX4_BYTES
+#endif
+#define TBICTXEXTHL2_BYTES     (3*8)
+#define TBICTXEXTTDR_BYTES    (27*8)
+#define TBICTXEXTTDP_BYTES TBICTXEXTTDR_BYTES
+
+#ifdef TBI_1_4
+#define TBICTXEXTFX8_BYTES     (4*8)
+#define TBICTXEXTFPAC_BYTES    (1*4 + 2*2 + 4*8)
+#define TBICTXEXTFACF_BYTES    (3*8)
+#endif
+
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTXEXT_MAXBITS  (TBICTX_XEXT_BIT|                \
+                            TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTXEXT_MAXBYTES (TBICTXEXT_BYTES+TBICTXEXTBB8_BYTES+\
+                         TBICTXEXTAXX_BYTES+TBICTXEXTHL2_BYTES+\
+                                            TBICTXEXTTDP_BYTES )
+
+#ifdef TBI_FASTINT_1_4
+/* Maximum flag bits to be set via the TBICTX_EXTSET macro */
+#define TBICTX2EXT_MAXBITS (TBICTX_XDX8_BIT|TBICTX_XAXX_BIT|\
+                            TBICTX_XHL2_BIT|TBICTX_XTDP_BIT )
+
+/* Maximum size of the extended context save area for current variant */
+#define TBICTX2EXT_MAXBYTES (TBICTXEXTBB8_BYTES+TBICTXEXTAXX_BYTES\
+                             +TBICTXEXTHL2_BYTES+TBICTXEXTTDP_BYTES )
+#endif
+
+/* Specify extended resources being used by current routine, code must be
+   assembler generated to utilise extended resources-
+
+        MOV     D0xxx,A0StP             ; Perform alloca - routine should
+        ADD     A0StP,A0StP,#SaveSize   ; setup/use A0FrP to access locals
+        MOVT    D1xxx,#SaveMask         ; TBICTX_XEXT_BIT MUST be set
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        
+    NB: OG(___TBIExtCtx) is a special case supported for SETL/GETL operations
+        on 64-bit sizes structures only, other accesses must be based on use
+        of OGA(___TBIExtCtx). 
+
+   At exit of routine-
+   
+        MOV     D0xxx,#0                ; Clear extended context save state
+        MOV     D1xxx,#0
+        SETL    [A1GbP+#OG(___TBIExtCtx)],D0xxx,D1xxx
+        SUB     A0StP,A0StP,#SaveSize   ; If original A0StP required
+        
+    NB: Both the setting and clearing of the whole __TBIExtCtx MUST be done
+        atomically in one 64-bit write operation.
+
+   For simple interrupt handling only via __TBINestInts there should be no
+   impact of the __TBIExtCtx system. If pre-emptive scheduling is being
+   performed however (assuming __TBINestInts has already been called earlier
+   on) then the following logic will correctly call __TBICtxSave if required
+   and clear out the currently selected background task-
+   
+        if ( __TBIExtCtx.Ctx.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Store extended states in pCtx * /
+            State.Sig.SaveMask |= __TBIExtCtx.Ctx.SaveMask;
+        
+            (void) __TBICtxSave( State, (void *) __TBIExtCtx.Ctx.pExt );
+            __TBIExtCtx.Val   = 0;
+        }
+        
+    and when restoring task states call __TBICtxRestore-
+    
+        / * Restore state from pCtx * /
+        State.Sig.pCtx     = pCtx;
+        State.Sig.SaveMask = pCtx->SaveMask;
+
+        if ( State.Sig.SaveMask & TBICTX_XEXT_BIT )
+        {
+            / * Restore extended states from pCtx * /
+            __TBIExtCtx.Val = pCtx->Ext.Val;
+            
+            (void) __TBICtxRestore( State, (void *) __TBIExtCtx.Ctx.pExt );
+        }   
+   
+ */
+
+/* Critical thread state save area */
+#ifndef __ASSEMBLY__
+typedef struct _tbictx_tag_ {
+    /* TXSTATUS_FLAG_BITS and TXSTATUS_LSM_STEP_BITS from TXSTATUS */
+    short Flags;
+    /* Mask indicates any extended context state saved; 0 -> Never run */
+    short SaveMask;
+    /* Saved PC value */
+    int CurrPC;
+    /* Saved critical register states */
+    TBIDUAL DX[8];
+    /* Background control register states - for cores without catch buffer
+       base in DIVTIME the TXSTATUS bits RPVALID and RPMASK are stored with
+       the real state TXDIVTIME in CurrDIVTIME */
+    int CurrRPT, CurrBPOBITS, CurrMODE, CurrDIVTIME;
+    /* Saved AX register states */
+    TBIDUAL AX[2];
+    TBIEXTCTX Ext;
+    TBIDUAL AX3[TBICTX_AX_REGS-3];
+    
+    /* Any CBUF state to be restored by a handler return must be stored here.
+       Other extended state can be stored anywhere - see __TBICtxSave and
+       __TBICtxRestore. */
+    
+} TBICTX, *PTBICTX;
+
+#ifdef TBI_FASTINT_1_4
+typedef struct _tbictx2_tag_ {
+    TBIDUAL AX[2];    /* AU.0, AU.1 */
+    TBIDUAL DX[2];    /* DU.0, DU.4 */
+    int     CurrMODE;
+    int     CurrRPT;
+    int     CurrSTATUS;
+    void   *CurrPC;   /* PC in PC address space */
+} TBICTX2, *PTBICTX2;
+/* TBICTX2 is followed by:
+ *   TBICTXEXTCB0                if TXSTATUS.CBMarker
+ *   TBIDUAL * TXSTATUS.IRPCount if TXSTATUS.IRPCount > 0
+ *   TBICTXGP                    if using __TBIStdRootIntHandler or __TBIStdCtxSwitchRootIntHandler
+ */
+
+typedef struct _tbictxgp_tag_ {
+    short    DspramSizes;
+    short    SaveMask;
+    void    *pExt;
+    TBIDUAL  DX[6]; /* DU.1-DU.3, DU.5-DU.7 */
+    TBIDUAL  AX[2]; /* AU.2-AU.3 */
+} TBICTXGP, *PTBICTXGP;
+
+#define TBICTXGP_DspramSizes (0)
+#define TBICTXGP_SaveMask    (TBICTXGP_DspramSizes + 2)
+#define TBICTXGP_MAX_BYTES   (2 + 2 + 4 + 8*(6+2))
+
+#endif
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBICTX */
+#define TBICTX_Flags            (0)
+#define TBICTX_SaveMask         (2)
+#define TBICTX_CurrPC           (4)
+#define TBICTX_DX               (2 + 2 + 4)
+#define TBICTX_CurrRPT          (2 + 2 + 4 + 8 * 8)
+#define TBICTX_CurrMODE         (2 + 2 + 4 + 8 * 8 + 4 + 4)
+#define TBICTX_AX               (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4)
+#define TBICTX_Ext              (2 + 2 + 4 + 8 * 8 + 4 + 4 + 4 + 4 + 2 * 8)
+#define TBICTX_Ext_AX2          (TBICTX_Ext + TBIEXTCTX_AX2)
+#define TBICTX_Ext_AX2_U0       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U0)
+#define TBICTX_Ext_AX2_U1       (TBICTX_Ext + TBIEXTCTX_AX2 + TBIDUAL_U1)
+#define TBICTX_Ext_Ctx_pExt     (TBICTX_Ext + TBIEXTCTX_Ctx_pExt)
+#define TBICTX_Ext_Ctx_SaveMask (TBICTX_Ext + TBIEXTCTX_Ctx_SaveMask)
+
+#ifdef TBI_FASTINT_1_4
+#define TBICTX2_BYTES (8 * 2 + 8 * 2 + 4 + 4 + 4 + 4)
+#define TBICTXEXTCB0_BYTES (4 + 4 + 8)
+
+#define TBICTX2_CRIT_MAX_BYTES (TBICTX2_BYTES + TBICTXEXTCB0_BYTES + 6 * TBIDUAL_BYTES)
+#define TBI_SWITCH_NEXT_PC(PC, EXTRA) ((PC) + (EXTRA & 1) ? 8 : 4)
+#endif
+
+#ifndef __ASSEMBLY__
+/* Extended thread state save areas - catch buffer state element */
+typedef struct _tbictxextcb0_tag_ {
+    /* Flags data and address value - see METAC_CATCH_VALUES in machine.h */
+    unsigned long CBFlags, CBAddr;
+    /* 64-bit data */
+    TBIDUAL CBData;
+    
+} TBICTXEXTCB0, *PTBICTXEXTCB0;
+
+/* Read pipeline state saved on later cores after single catch buffer slot */
+typedef struct _tbictxextrp6_tag_ {
+    /* RPMask is TXSTATUS_RPMASK_BITS only, reserved is undefined */
+    unsigned long RPMask, Reserved0;
+    TBIDUAL CBData[6];
+    
+} TBICTXEXTRP6, *PTBICTXEXTRP6;
+
+/* Extended thread state save areas - 8 DU register pairs */
+typedef struct _tbictxextbb8_tag_ {
+    /* Remaining Data unit registers in 64-bit pairs */
+    TBIDUAL UX[8];
+    
+} TBICTXEXTBB8, *PTBICTXEXTBB8;
+
+/* Extended thread state save areas - 3 AU register pairs */
+typedef struct _tbictxextbb3_tag_ {
+    /* Remaining Address unit registers in 64-bit pairs */
+    TBIDUAL UX[3];
+    
+} TBICTXEXTBB3, *PTBICTXEXTBB3;
+
+/* Extended thread state save areas - 4 AU register pairs or 4 FX pairs */
+typedef struct _tbictxextbb4_tag_ {
+    /* Remaining Address unit or FPU registers in 64-bit pairs */
+    TBIDUAL UX[4];
+    
+} TBICTXEXTBB4, *PTBICTXEXTBB4;
+
+/* Extended thread state save areas - Hardware loop states (max 2) */
+typedef struct _tbictxexthl2_tag_ {
+    /* Hardware looping register states */
+    TBIDUAL Start, End, Count;
+    
+} TBICTXEXTHL2, *PTBICTXEXTHL2;
+
+/* Extended thread state save areas - DSP register states */
+typedef struct _tbictxexttdp_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP > 32-bit accumulator bits 63:32 of ACX.0 (zero-extended) */
+    TBIDUAL Acc64[1];
+    /* Twiddle register state, and three phase increment states */
+    TBIDUAL PReg[4];
+    /* Modulo region size, padded to 64-bits */
+    int CurrMRSIZE, Reserved0;
+    
+} TBICTXEXTTDP, *PTBICTXEXTTDP;
+
+/* Extended thread state save areas - DSP register states including DSP RAM */
+typedef struct _tbictxexttdpr_tag_ {
+    /* DSP 32-bit accumulator register state (Bits 31:0 of ACX.0) */
+    TBIDUAL Acc32[1];
+    /* DSP 40-bit accumulator register state (Bits 39:8 of ACX.0) */
+    TBIDUAL Acc40[1];
+    /* DSP RAM Pointers */
+    TBIDUAL RP0[2],  WP0[2],  RP1[2],  WP1[2];
+    /* DSP RAM Increments */
+    TBIDUAL RPI0[2], WPI0[2], RPI1[2], WPI1[2];
+    /* Template registers */
+    unsigned long Tmplt[16];
+    /* Modulo address region size and DSP RAM module region sizes */
+    int CurrMRSIZE, CurrDRSIZE;
+    
+} TBICTXEXTTDPR, *PTBICTXEXTTDPR;
+
+#ifdef TBI_1_4
+/* The METAC_ID_CORE register state is a marker for the FPU
+   state that is then stored after this core header structure.  */
+#define TBICTXEXTFPU_CONFIG_MASK  ( (METAC_COREID_NOFPACC_BIT+     \
+                                     METAC_COREID_CFGFPU_BITS ) << \
+                                     METAC_COREID_CONFIG_BITS       )
+
+/* Recorded FPU exception state from TXDEFR in DefrFpu */
+#define TBICTXEXTFPU_DEFRFPU_MASK (TXDEFR_FPU_FE_BITS)
+
+/* Extended thread state save areas - FPU register states */
+typedef struct _tbictxextfpu_tag_ {
+    /* Stored METAC_CORE_ID CONFIG */
+    int CfgFpu;
+    /* Stored deferred TXDEFR bits related to FPU
+     *
+     * This is encoded as follows in order to fit into 16-bits:
+     * DefrFPU:15 - 14 <= 0
+     *        :13 -  8 <= TXDEFR:21-16
+     *        : 7 -  6 <= 0
+     *        : 5 -  0 <= TXDEFR:5-0
+     */
+    short DefrFpu;
+
+    /* TXMODE bits related to FPU */
+    short ModeFpu;
+    
+    /* FPU Even/Odd register states */
+    TBIDUAL FX[4];
+   
+    /* if CfgFpu & TBICTX_CFGFPU_FX16_BIT  -> 1 then TBICTXEXTBB4 holds FX.8-15 */
+    /* if CfgFpu & TBICTX_CFGFPU_NOACF_BIT -> 0 then TBICTXEXTFPACC holds state */
+} TBICTXEXTFPU, *PTBICTXEXTFPU;
+
+/* Extended thread state save areas - FPU accumulator state */
+typedef struct _tbictxextfpacc_tag_ {
+    /* FPU accumulator register state - three 64-bit parts */
+    TBIDUAL FAcc32[3];
+    
+} TBICTXEXTFPACC, *PTBICTXEXTFPACC;
+#endif
+
+/* Prototype TBI structure */
+struct _tbi_tag_ ;
+
+/* A 64-bit return value used commonly in the TBI APIs */
+typedef union _tbires_tag_ {
+    /* Save and load this value to get/set the whole result quickly */
+    long long Val;
+
+    /* Parameter of a fnSigs or __TBICtx* call */
+    struct _tbires_sig_tag_ { 
+        /* TXMASK[I] bits zeroed upto and including current trigger level */
+        unsigned short TrigMask;
+        /* Control bits for handlers - see PTBIAPIFN documentation below */
+        unsigned short SaveMask;
+        /* Pointer to the base register context save area of the thread */
+        PTBICTX pCtx;
+    } Sig;
+
+    /* Result of TBIThrdPrivId call */
+    struct _tbires_thrdprivid_tag_ {
+        /* Basic thread identifier; just TBID_THREAD_BITS */
+        int Id;
+        /* None thread number bits; TBID_ISTAT_BIT+TBID_PSTAT_BIT */
+        int Priv;
+    } Thrd;
+
+    /* Parameter and Result of a __TBISwitch call */
+    struct _tbires_switch_tag_ { 
+        /* Parameter passed across context switch */
+        void *pPara;
+        /* Thread context of other Thread includng restore flags */
+        PTBICTX pCtx;
+    } Switch;
+    
+    /* For extended S/W events only */
+    struct _tbires_ccb_tag_ {
+        void *pCCB;
+        int COff;
+    } CCB;
+
+    struct _tbires_tlb_tag_ {
+        int Leaf;  /* TLB Leaf data */
+        int Flags; /* TLB Flags */
+    } Tlb;
+
+#ifdef TBI_FASTINT_1_4
+    struct _tbires_intr_tag_ {
+      short    TrigMask;
+      short    SaveMask;
+      PTBICTX2 pCtx;
+    } Intr;
+#endif
+
+} TBIRES, *PTBIRES;
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* Prototype for all signal handler functions, called via ___TBISyncTrigger or
+   ___TBIASyncTrigger.
+   
+   State.Sig.TrigMask will indicate the bits set within TXMASKI at
+          the time of the handler call that have all been cleared to prevent
+          nested interrupt occuring immediately.
+   
+   State.Sig.SaveMask is a bit-mask which will be set to Zero when a trigger
+          occurs at background level and TBICTX_CRIT_BIT and optionally
+          TBICTX_CBUF_BIT when a trigger occurs at interrupt level.
+          
+          TBICTX_CBUF_BIT reflects the state of TXSTATUS_CBMARKER_BIT for
+          the interrupted background thread.
+   
+   State.Sig.pCtx will point at a TBICTX structure generated to hold the
+          critical state of the interrupted thread at interrupt level and
+          should be set to NULL when called at background level.
+        
+   Triggers will indicate the status of TXSTAT or TXSTATI sampled by the
+          code that called the handler.
+          
+   InstOrSWSId is defined firstly as 'Inst' if the SigNum is TBID_SIGNUM_SWx
+          and hold the actual SWITCH instruction detected, secondly if SigNum
+          is TBID_SIGNUM_SWS the 'SWSId' is defined to hold the Id of the
+          software signal detected, in other cases the value of this
+          parameter is undefined.
+   
+   pTBI   points at the PTBI structure related to the thread and processing
+          level involved.
+
+   TBIRES return value at both processing levels is similar in terms of any
+          changes that the handler makes. By default the State argument value
+          passed in should be returned.
+          
+      Sig.TrigMask value is bits to OR back into TXMASKI when the handler
+          completes to enable currently disabled interrupts.
+          
+      Sig.SaveMask value is ignored.
+   
+      Sig.pCtx is ignored.
+
+ */
+typedef TBIRES (*PTBIAPIFN)( TBIRES State, int SigNum,
+                             int Triggers, int InstOrSWSId,
+                             volatile struct _tbi_tag_ *pTBI );
+#endif /* ifndef __ASSEMBLY__ */
+
+#ifndef __ASSEMBLY__
+/* The global memory map is described by a list of segment descriptors */
+typedef volatile struct _tbiseg_tag_ {
+    volatile struct _tbiseg_tag_ *pLink;
+    int Id;                           /* Id of the segment */
+    TBISPIN Lock;                     /* Spin-lock for struct (normally 0) */
+    unsigned int Bytes;               /* Size of region in bytes */
+    void *pGAddr;                     /* Base addr of region in global space */
+    void *pLAddr;                     /* Base addr of region in local space */
+    int Data[2];                      /* Segment specific data (may be extended) */
+
+} TBISEG, *PTBISEG;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Offsets of fields in TBISEG structure */
+#define TBISEG_pLink    ( 0)
+#define TBISEG_Id       ( 4)
+#define TBISEG_Lock     ( 8)
+#define TBISEG_Bytes    (12)
+#define TBISEG_pGAddr   (16)
+#define TBISEG_pLAddr   (20)
+#define TBISEG_Data     (24)
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbi_tag_ {
+    int SigMask;                      /* Bits set to represent S/W events */
+    PTBIKICK pKick;                   /* Kick addr for S/W events */
+    void *pCCB;                       /* Extended S/W events */
+    PTBISEG pSeg;                     /* Related segment structure */
+    PTBIAPIFN fnSigs[TBID_SIGNUM_MAX+1];/* Signal handler API table */
+} *PTBI, TBI;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Byte offsets of fields within TBI */
+#define TBI_SigMask     (0)
+#define TBI_pKick       (4)
+#define TBI_pCCB        (8)
+#define TBI_pSeg       (12)
+#define TBI_fnSigs     (16)
+
+#ifdef TBI_1_4
+#ifndef __ASSEMBLY__
+/* This handler should be used for TBID_SIGNUM_DFR */
+extern TBIRES __TBIHandleDFR ( TBIRES State, int SigNum,
+                               int Triggers, int InstOrSWSId,
+                               volatile struct _tbi_tag_ *pTBI );
+#endif
+#endif
+
+/* String table entry - special values */
+#define METAG_TBI_STRS (0x5300) /* Tag      : If entry is valid */
+#define METAG_TBI_STRE (0x4500) /* Tag      : If entry is end of table */
+#define METAG_TBI_STRG (0x4700) /* Tag      : If entry is a gap */
+#define METAG_TBI_STRX (0x5A00) /* TransLen : If no translation present */
+
+#ifndef __ASSEMBLY__
+typedef volatile struct _tbistr_tag_ {
+    short Bytes;                      /* Length of entry in Bytes */
+    short Tag;                        /* Normally METAG_TBI_STRS(0x5300) */
+    short Len;                        /* Length of the string entry (incl null) */
+    short TransLen;                   /* Normally METAG_TBI_STRX(0x5A00) */
+    char String[8];                   /* Zero terminated (may-be bigger) */
+
+} TBISTR, *PTBISTR;
+#endif /* ifndef __ASSEMBLY__ */
+
+/* Cache size information - available as fields of Data[1] of global heap
+   segment */
+#define METAG_TBI_ICACHE_SIZE_S    0             /* see comments below */
+#define METAG_TBI_ICACHE_SIZE_BITS 0x0000000F
+#define METAG_TBI_ICACHE_FILL_S    4
+#define METAG_TBI_ICACHE_FILL_BITS 0x000000F0
+#define METAG_TBI_DCACHE_SIZE_S    8
+#define METAG_TBI_DCACHE_SIZE_BITS 0x00000F00
+#define METAG_TBI_DCACHE_FILL_S    12
+#define METAG_TBI_DCACHE_FILL_BITS 0x0000F000
+
+/* METAG_TBI_xCACHE_SIZE
+   Describes the physical cache size rounded up to the next power of 2
+   relative to a 16K (2^14) cache. These sizes are encoded as a signed addend
+   to this base power of 2, for example
+      4K -> 2^12 -> -2  (i.e. 12-14)
+      8K -> 2^13 -> -1
+     16K -> 2^14 ->  0
+     32K -> 2^15 -> +1
+     64K -> 2^16 -> +2
+    128K -> 2^17 -> +3
+
+   METAG_TBI_xCACHE_FILL
+   Describes the physical cache size within the power of 2 area given by
+   the value above. For example a 10K cache may be represented as having
+   nearest size 16K with a fill of 10 sixteenths. This is encoded as the
+   number of unused 1/16ths, for example
+     0000 ->  0 -> 16/16
+     0001 ->  1 -> 15/16
+     0010 ->  2 -> 14/16
+     ...
+     1111 -> 15 ->  1/16
+ */
+
+#define METAG_TBI_CACHE_SIZE_BASE_LOG2 14
+
+/* Each declaration made by this macro generates a TBISTR entry */
+#ifndef __ASSEMBLY__
+#define TBISTR_DECL( Name, Str )                                       \
+    __attribute__ ((__section__ (".tbistr") )) const char Name[] = #Str
+#endif
+
+/* META timer values - see below for Timer support routines */
+#define TBI_TIMERWAIT_MIN (-16)         /* Minimum 'recommended' period */
+#define TBI_TIMERWAIT_MAX (-0x7FFFFFFF) /* Maximum 'recommended' period */
+
+#ifndef __ASSEMBLY__
+/* These macros allow direct access from C to any register known to the
+   assembler or defined in machine.h. Example candidates are TXTACTCYC,
+   TXIDLECYC, and TXPRIVEXT. Note that when higher level macros and routines
+   like the timer and trigger handling features below these should be used in
+   preference to this direct low-level access mechanism. */
+#define TBI_GETREG( Reg )                                  __extension__ ({\
+   int __GRValue;                                                          \
+   __asm__ volatile ("MOV\t%0," #Reg "\t/* (*TBI_GETREG OK) */" :          \
+                     "=r" (__GRValue) );                                   \
+    __GRValue;                                                            })
+
+#define TBI_SETREG( Reg, Value )                                       do {\
+   int __SRValue = Value;                                                  \
+   __asm__ volatile ("MOV\t" #Reg ",%0\t/* (*TBI_SETREG OK) */" :          \
+                     : "r" (__SRValue) );                       } while (0)
+
+#define TBI_SWAPREG( Reg, Value )                                      do {\
+   int __XRValue = (Value);                                                \
+   __asm__ volatile ("SWAP\t" #Reg ",%0\t/* (*TBI_SWAPREG OK) */" :        \
+                     "=r" (__XRValue) : "0" (__XRValue) );                 \
+   Value = __XRValue;                                           } while (0)
+
+/* Obtain and/or release global critical section lock given that interrupts
+   are already disabled and/or should remain disabled. */
+#define TBI_NOINTSCRITON                                             do {\
+   __asm__ volatile ("LOCK1\t\t/* (*TBI_NOINTSCRITON OK) */");} while (0)
+#define TBI_NOINTSCRITOFF                                             do {\
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_NOINTSCRITOFF OK) */");} while (0)
+/* Optimised in-lining versions of the above macros */
+
+#define TBI_LOCK( TrigState )                                          do {\
+   int __TRValue;                                                          \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_LOCK ... */\n\t"               \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK2\t\t/* ... */\n\t"                              \
+                     "SETD\t[%1+#0x40],D1RtP /* ... OK) */" :              \
+                     "=r&" (__TRValue) : "u" (__ALOCKHI) );                \
+   TrigState = __TRValue;                                       } while (0)
+#define TBI_CRITON( TrigState )                                        do {\
+   int __TRValue;                                                          \
+   __asm__ volatile ("MOV %0,#0\t\t/* (*TBI_CRITON ... */\n\t"             \
+                     "SWAP\t%0,TXMASKI\t/* ... */\n\t"                     \
+                     "LOCK1\t\t/* ... OK) */" :                            \
+                     "=r" (__TRValue) );                                   \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_INTSX( TrigState )                                         do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("SWAP\t%0,TXMASKI\t/* (*TBI_INTSX OK) */" :           \
+                     "=r" (__TRValue) : "0" (__TRValue) );                 \
+   TrigState = __TRValue;                                       } while (0)
+
+#define TBI_UNLOCK( TrigState )                                        do {\
+   int __TRValue = TrigState;                                              \
+   int __ALOCKHI = LINSYSEVENT_WR_ATOMIC_LOCK & 0xFFFF0000;                \
+   __asm__ volatile ("SETD\t[%1+#0x00],D1RtP\t/* (*TBI_UNLOCK ... */\n\t"  \
+                     "LOCK0\t\t/* ... */\n\t"                              \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue), "u" (__ALOCKHI) );      } while (0)
+
+#define TBI_CRITOFF( TrigState )                                       do {\
+   int __TRValue = TrigState;                                              \
+   __asm__ volatile ("LOCK0\t\t/* (*TBI_CRITOFF ... */\n\t"                \
+                     "MOV\tTXMASKI,%0\t/* ... OK) */" :                    \
+                     : "r" (__TRValue) );                       } while (0)
+
+#define TBI_TRIGSX( SrcDst ) do { TBI_SWAPREG( TXMASK, SrcDst );} while (0)
+
+/* Composite macros to perform logic ops on INTS or TRIGS masks */
+#define TBI_INTSOR( Bits )                                              do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT |= (Bits); TBI_INTSX(__TT);                             } while (0)
+    
+#define TBI_INTSAND( Bits )                                             do {\
+    int __TT = 0; TBI_INTSX(__TT);                                          \
+    __TT &= (Bits); TBI_INTSX(__TT);                             } while (0)
+
+#ifdef TBI_1_4
+#define TBI_DEFRICTRLSOR( Bits )                                        do {\
+    int __TT = TBI_GETREG( CT.20 );                                         \
+    __TT |= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+    
+#define TBI_DEFRICTRLSAND( Bits )                                       do {\
+    int __TT = TBI_GETREG( TXDEFR );                                        \
+    __TT &= (Bits); TBI_SETREG( CT.20, __TT);                    } while (0)
+#endif
+
+#define TBI_TRIGSOR( Bits )                                             do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT |= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+    
+#define TBI_TRIGSAND( Bits )                                            do {\
+    int __TT = TBI_GETREG( TXMASK );                                        \
+    __TT &= (Bits); TBI_SETREG( TXMASK, __TT);                   } while (0)
+
+/* Macros to disable and re-enable interrupts using TBI_INTSX, deliberate
+   traps and exceptions can still be handled within the critical section. */
+#define TBI_STOPINTS( Value )                                           do {\
+    int __TT = TBI_GETREG( TXMASKI );                                       \
+    __TT &= TXSTATI_BGNDHALT_BIT; TBI_INTSX( __TT );                        \
+    Value = __TT;                                                } while (0)
+#define TBI_RESTINTS( Value )                                           do {\
+    int __TT = Value; TBI_INTSX( __TT );                         } while (0)
+
+/* Return pointer to segment list at current privilege level */
+PTBISEG __TBISegList( void );
+
+/* Search the segment list for a match given Id, pStart can be NULL */
+PTBISEG __TBIFindSeg( PTBISEG pStart, int Id );
+
+/* Prepare a new segment structure using space from within another */
+PTBISEG __TBINewSeg( PTBISEG pFromSeg, int Id, unsigned int Bytes );
+
+/* Prepare a new segment using any global or local heap segments available */
+PTBISEG __TBIMakeNewSeg( int Id, unsigned int Bytes );
+
+/* Insert a new segment into the segment list so __TBIFindSeg can locate it */
+void __TBIAddSeg( PTBISEG pSeg );
+#define __TBIADDSEG_DEF     /* Some versions failed to define this */
+
+/* Return Id of current thread; TBID_ISTAT_BIT+TBID_THREAD_BITS */
+int __TBIThreadId( void );
+
+/* Return TBIRES.Thrd data for current thread */
+TBIRES __TBIThrdPrivId( void );
+
+/* Return pointer to current threads TBI root block.
+   Id implies whether Int or Background root block is required */
+PTBI __TBI( int Id );
+
+/* Try to set Mask bit using the spin-lock protocol, return 0 if fails and 
+   new state if succeeds */
+int __TBIPoll( PTBISPIN pLock, int Mask );
+
+/* Set Mask bits via the spin-lock protocol in *pLock, return new state */
+int __TBISpin( PTBISPIN pLock, int Mask );
+
+/* Default handler set up for all TBI.fnSigs entries during initialisation */
+TBIRES __TBIUnExpXXX( TBIRES State, int SigNum,
+                   int Triggers, int Inst, PTBI pTBI );
+
+/* Call this routine to service triggers at background processing level. The
+   TBID_POLL_BIT of the Id parameter value will be used to indicate that the
+   routine should return if no triggers need to be serviced initially. If this
+   bit is not set the routine will block until one trigger handler is serviced
+   and then behave like the poll case servicing any remaining triggers
+   actually outstanding before returning. Normally the State parameter should
+   be simply initialised to zero and the result should be ignored, other
+   values/options are for internal use only. */
+TBIRES __TBISyncTrigger( TBIRES State, int Id );
+
+/* Call this routine to enable processing of triggers by signal handlers at
+   interrupt level. The State parameter value passed is returned by this
+   routine. The State.Sig.TrigMask field also specifies the initial
+   state of the interrupt mask register TXMASKI to be setup by the call.
+   The other parts of the State parameter are ignored unless the PRIV bit is
+   set in the SaveMask field. In this case the State.Sig.pCtx field specifies
+   the base of the stack to which the interrupt system should switch into
+   as it saves the state of the previously executing code. In the case the
+   thread will be unprivileged as it continues execution at the return
+   point of this routine and it's future state will be effectively never
+   trusted to be valid. */
+TBIRES __TBIASyncTrigger( TBIRES State );
+
+/* Call this to swap soft threads executing at the background processing level.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The saved
+   state of the previous thread will be stored in a TBICTX descriptor created
+   on it's stack and the address of this will be stored into the *rpSaveCtx
+   location specified. */
+TBIRES __TBISwitch( TBIRES NextThread, PTBICTX *rpSaveCtx );
+
+/* Call this to initialise a stack frame ready for further use, up to four
+   32-bit arguments may be specified after the fixed args to be passed via
+   the new stack pStack to the routine specified via fnMain. If the
+   main-line routine ever returns the thread will operate as if main itself
+   had returned and terminate with the return code given. */
+typedef int (*PTBIMAINFN)( TBIRES Arg /*, <= 4 additional 32-bit args */ );
+PTBICTX __TBISwitchInit( void *pStack, PTBIMAINFN fnMain, ... );
+
+/* Call this to resume a thread from a saved synchronous TBICTX state.
+   The TBIRES returned to the new thread will be the same as the NextThread
+   value specified to the call. The NextThread.Switch.pCtx value specifies
+   which thread context to restore and the NextThread.Switch.Para value can
+   hold an arbitrary expression to be passed between the threads. The context
+   of the calling thread is lost and this routine never returns to the
+   caller. The TrigsMask value supplied is ored into TXMASKI to enable
+   interrupts after the context of the new thread is established. */
+void __TBISyncResume( TBIRES NextThread, int TrigsMask );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtxSave( TBIRES State, void *pExt );
+void *__TBICtxRestore( TBIRES State, void *pExt );
+
+#ifdef TBI_1_4
+#ifdef TBI_FASTINT_1_4
+/* Call these routines to copy the GP state to a separate buffer
+ * Only necessary for context switching.
+ */
+PTBICTXGP __TBICtx2SaveCrit( PTBICTX2 pCurrentCtx, PTBICTX2 pSaveCtx );
+void *__TBICtx2SaveGP( PTBICTXGP pCurrentCtxGP, PTBICTXGP pSaveCtxGP );
+
+/* Call these routines to save and restore the extended states of
+   scheduled tasks. */
+void *__TBICtx2Save( PTBICTXGP pCtxGP, short SaveMask, void *pExt );
+void *__TBICtx2Restore( PTBICTX2 pCtx, short SaveMask, void *pExt );
+#endif
+
+/* If FPAC flag is set then significant FPU context exists. Call these routine
+   to save and restore it */
+void *__TBICtxFPUSave( TBIRES State, void *pExt );
+void *__TBICtxFPURestore( TBIRES State, void *pExt );
+
+#ifdef TBI_FASTINT_1_4
+extern void *__TBICtx2FPUSave (PTBICTXGP, short, void*);
+extern void *__TBICtx2FPURestore (PTBICTXGP, short, void*);
+#endif
+#endif
+
+#ifdef TBI_1_4
+/* Call these routines to save and restore DSPRAM. */
+void *__TBIDspramSaveA (short DspramSizes, void *pExt);
+void *__TBIDspramSaveB (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreA (short DspramSizes, void *pExt);
+void *__TBIDspramRestoreB (short DspramSizes, void *pExt);
+#endif
+
+/* This routine should be used at the entrypoint of interrupt handlers to
+   re-enable higher priority interrupts and/or save state from the previously
+   executing background code. State is a TBIRES.Sig parameter with NoNestMask
+   indicating the triggers (if any) that should remain disabled and SaveMask
+   CBUF bit indicating the if the hardware catch buffer is dirty. Optionally
+   any number of extended state bits X??? including XCBF can be specified to
+   force a nested state save call to __TBICtxSave before the current routine
+   continues. (In the latter case __TBICtxRestore should be called to restore
+   any extended states before the background thread of execution is resumed) 
+   
+   By default (no X??? bits specified in SaveMask) this routine performs a
+   sub-call to __TBICtxSave with the pExt and State parameters specified IF
+   some triggers could be serviced while the current interrupt handler
+   executes and the hardware catch buffer is actually dirty. In this case
+   this routine provides the XCBF bit in State.Sig.SaveMask to force the
+   __TBICtxSave to extract the current catch state.
+   
+   The NoNestMask parameter should normally indicate that the same or lower
+   triggers than those provoking the current handler call should not be
+   serviced in nested calls, zero may be specified if all possible interrupts
+   are to be allowed.
+   
+   The TBIRES.Sig value returned will be similar to the State parameter
+   specified with the XCBF bit ORed into it's SaveMask if a context save was
+   required and fewer bits set in it's TrigMask corresponding to the same/lower
+   priority interrupt triggers still not enabled. */
+TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask );
+
+/* This routine causes the TBICTX structure specified in State.Sig.pCtx to
+   be restored. This implies that execution will not return to the caller.
+   The State.Sig.TrigMask field will be restored during the context switch
+   such that any immediately occuring interrupts occur in the context of the
+   newly specified task. The State.Sig.SaveMask parameter is ignored. */
+void __TBIASyncResume( TBIRES State );
+
+/* Call this routine to enable fastest possible processing of one or more
+   interrupt triggers via a unified signal handler. The handler concerned
+   must simple return after servicing the related hardware.
+   The State.Sig.TrigMask parameter indicates the interrupt triggers to be
+   enabled and the Thin.Thin.fnHandler specifies the routine to call and
+   the whole Thin parameter value will be passed to this routine unaltered as
+   it's first parameter. */
+void __TBIASyncThin( TBIRES State, TBIRES Thin );
+
+/* Do this before performing your own direct spin-lock access - use TBI_LOCK */
+int __TBILock( void );
+
+/* Do this after performing your own direct spin-lock access - use TBI_UNLOCK */
+void __TBIUnlock( int TrigState );
+
+/* Obtain and release global critical section lock - only stops execution
+   of interrupts on this thread and similar critical section code on other
+   local threads - use TBI_CRITON or TBI_CRITOFF */
+int __TBICritOn( void );
+void __TBICritOff( int TrigState );
+
+/* Change INTS (TXMASKI) - return old state - use TBI_INTSX */
+int __TBIIntsX( int NewMask );
+
+/* Change TRIGS (TXMASK) - return old state - use TBI_TRIGSX */
+int __TBITrigsX( int NewMask );
+
+/* This function initialises a timer for first use, only the TBID_ISTAT_BIT
+   of the Id parameter is used to indicate which timer is to be modified. The
+   Wait value should either be zero to disable the timer concerned or be in
+   the recommended TBI_TIMERWAIT_* range to specify the delay required before
+   the first timer trigger occurs.
+      
+   The TBID_ISTAT_BIT of the Id parameter similar effects all other timer
+   support functions (see below). */
+void __TBITimerCtrl( int Id, int Wait );
+
+/* This routine returns a 64-bit time stamp value that is initialised to zero
+   via a __TBITimerCtrl timer enabling call. */
+long long __TBITimeStamp( int Id );
+
+/* To manage a periodic timer each period elapsed should be subracted from
+   the current timer value to attempt to set up the next timer trigger. The
+   Wait parameter should be a value in the recommended TBI_TIMERWAIT_* range.
+   The return value is the new aggregate value that the timer was updated to,
+   if this is less than zero then a timer trigger is guaranteed to be
+   generated after the number of ticks implied, if a positive result is
+   returned either itterative or step-wise corrective action must be taken to
+   resynchronise the timer and hence provoke a future timer trigger. */
+int __TBITimerAdd( int Id, int Wait );
+
+/* String table search function, pStart is first entry to check or NULL,
+   pStr is string data to search for and MatchLen is either length of string
+   to compare for an exact match or negative length to compare for partial
+   match. */
+const TBISTR *__TBIFindStr( const TBISTR *pStart,
+                            const char *pStr, int MatchLen );
+
+/* String table translate function, pStr is text to translate and Len is
+   it's length. Value returned may not be a string pointer if the
+   translation value is really some other type, 64-bit alignment of the return
+   pointer is guaranteed so almost any type including a structure could be
+   located with this routine. */ 
+const void *__TBITransStr( const char *pStr, int Len );
+
+
+
+/* Arbitrary physical memory access windows, use different Channels to avoid
+   conflict/thrashing within a single piece of code. */
+void *__TBIPhysAccess( int Channel, int PhysAddr, int Bytes );
+void __TBIPhysRelease( int Channel, void *pLinAddr );
+
+#ifdef METAC_1_0
+/* Data cache function nullified because data cache is off */
+#define TBIDCACHE_FLUSH( pAddr )
+#define TBIDCACHE_PRELOAD( Type, pAddr ) ((Type) (pAddr))
+#define TBIDCACHE_REFRESH( Type, pAddr ) ((Type) (pAddr))
+#endif
+#ifdef METAC_1_1
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( pAddr )          ((volatile char *) \
+                 (((unsigned int) (pAddr))>>LINSYSLFLUSH_S))[0] = 0
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                   __extension__ ({ \
+  Type __addr = (Type)(Addr);                                              \
+  (void)__builtin_dcache_refresh ((void *)(((unsigned int)(__addr))>>6));  \
+  __addr; })
+
+#endif
+#ifndef METAC_1_0
+#ifndef METAC_1_1
+/* Support for DCACHE builtin */
+extern void __builtin_dcache_flush (void *);
+
+/* To flush a single cache line from the data cache using a linear address */
+#define TBIDCACHE_FLUSH( Addr )                                            \
+  __builtin_dcache_flush ((void *)(Addr))
+
+extern void * __builtin_dcache_preload (void *);
+
+/* Try to ensure that the data at the address concerned is in the cache */
+#define TBIDCACHE_PRELOAD( Type, Addr )                                    \
+  ((Type) __builtin_dcache_preload ((void *)(Addr)))
+
+extern void * __builtin_dcache_refresh (void *);
+
+/* Flush any old version of data from address and re-load a new copy */
+#define TBIDCACHE_REFRESH( Type, Addr )                                    \
+  ((Type) __builtin_dcache_refresh ((void *)(Addr)))
+
+#endif
+#endif
+
+/* Flush the MMCU cache */
+#define TBIMCACHE_FLUSH() { ((volatile int *) LINSYSCFLUSH_MMCU)[0] = 0; }
+
+#ifdef METAC_2_1
+/* Obtain the MMU table entry for the specified address */
+#define TBIMTABLE_LEAFDATA(ADDR) TBIXCACHE_RD((int)(ADDR) & (-1<<6))
+
+#ifndef __ASSEMBLY__
+/* Obtain the full MMU table entry for the specified address */
+#define TBIMTABLE_DATA(ADDR) __extension__ ({ TBIRES __p;                     \
+                                              __p.Val = TBIXCACHE_RL((int)(ADDR) & (-1<<6));   \
+                                              __p; })
+#endif
+#endif
+
+/* Combine a physical base address, and a linear address
+ * Internal use only
+ */
+#define _TBIMTABLE_LIN2PHYS(PHYS, LIN, LMASK) (void*)(((int)(PHYS)&0xFFFFF000)\
+                                               +((int)(LIN)&(LMASK)))
+
+/* Convert a linear to a physical address */
+#define TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR)                                    \
+          (((LEAFDATA) & CRLINPHY0_VAL_BIT)                                   \
+              ? _TBIMTABLE_LIN2PHYS(LEAFDATA, ADDR, 0x00000FFF)               \
+              : 0)
+
+/* Debug support - using external debugger or host */
+void __TBIDumpSegListEntries( void );
+void __TBILogF( const char *pFmt, ... );
+void __TBIAssert( const char *pFile, int LineNum, const char *pExp );
+void __TBICont( const char *pMsg, ... ); /* TBIAssert -> 'wait for continue' */
+
+/* Array of signal name data for debug messages */
+extern const char __TBISigNames[];
+#endif /* ifndef __ASSEMBLY__ */
+
+
+
+/* Scale of sub-strings in the __TBISigNames string list */
+#define TBI_SIGNAME_SCALE   4
+#define TBI_SIGNAME_SCALE_S 2
+
+#define TBI_1_3 
+
+#ifdef TBI_1_3
+
+#ifndef __ASSEMBLY__
+#define TBIXCACHE_RD(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    int __Data;                                                            \
+    __asm__ volatile ( "CACHERD\t%0,[%1+#0]" :                             \
+                       "=r" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_RL(ADDR)                                 __extension__ ({\
+    void * __Addr = (void *)(ADDR);                                        \
+    long long __Data;                                                      \
+    __asm__ volatile ( "CACHERL\t%0,%t0,[%1+#0]" :                         \
+                       "=d" (__Data) : "r" (__Addr) );                     \
+    __Data;                                                               })
+
+#define TBIXCACHE_WD(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    int __Data = DATA;                                                    \
+    __asm__ volatile ( "CACHEWD\t[%0+#0],%1" :                            \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#define TBIXCACHE_WL(ADDR, DATA)                                      do {\
+    void * __Addr = (void *)(ADDR);                                       \
+    long long __Data = DATA;                                              \
+    __asm__ volatile ( "CACHEWL\t[%0+#0],%1,%t1" :                        \
+                       : "r" (__Addr), "r" (__Data) );          } while(0)
+
+#ifdef TBI_4_0
+
+#define TBICACHE_FLUSH_L1D_L2(ADDR)                                       \
+  TBIXCACHE_WD(ADDR, CACHEW_FLUSH_L1D_L2)
+#define TBICACHE_WRITEBACK_L1D_L2(ADDR)                                   \
+  TBIXCACHE_WD(ADDR, CACHEW_WRITEBACK_L1D_L2)
+#define TBICACHE_INVALIDATE_L1D(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D)
+#define TBICACHE_INVALIDATE_L1D_L2(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1D_L2)
+#define TBICACHE_INVALIDATE_L1DTLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1DTLB)
+#define TBICACHE_INVALIDATE_L1I(ADDR)                                     \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1I)
+#define TBICACHE_INVALIDATE_L1ITLB(ADDR)                                  \
+  TBIXCACHE_WD(ADDR, CACHEW_INVALIDATE_L1ITLB)
+
+#endif /* TBI_4_0 */
+#endif /* ifndef __ASSEMBLY__ */
+
+/* 
+ * Calculate linear PC value from real PC and Minim mode control, the LSB of
+ * the result returned indicates if address compression has occured.
+ */
+#ifndef __ASSEMBLY__
+#define METAG_LINPC( PCVal )                                              (\
+    ( (TBI_GETREG(TXPRIVEXT) & TXPRIVEXT_MINIMON_BIT) != 0 ) ?           ( \
+        ( ((PCVal) & 0x00900000) == 0x00900000 ) ?                         \
+          (((PCVal) & 0xFFE00000) + (((PCVal) & 0x001FFFFC)>>1) + 1) :     \
+        ( ((PCVal) & 0x00800000) == 0x00000000 ) ?                         \
+          (((PCVal) & 0xFF800000) + (((PCVal) & 0x007FFFFC)>>1) + 1) :     \
+                                                             (PCVal)   )   \
+                                                                 : (PCVal) )
+#define METAG_LINPC_X2BIT 0x00000001       /* Make (Size>>1) if compressed */
+
+/* Convert an arbitrary Linear address into a valid Minim PC or return 0 */
+#define METAG_PCMINIM( LinVal )                                           (\
+        (((LinVal) & 0x00980000) == 0x00880000) ?                          \
+            (((LinVal) & 0xFFE00000) + (((LinVal) & 0x000FFFFE)<<1)) :     \
+        (((LinVal) & 0x00C00000) == 0x00000000) ?                          \
+            (((LinVal) & 0xFF800000) + (((LinVal) & 0x003FFFFE)<<1)) : 0   )
+
+/* Reverse a METAG_LINPC conversion step to return the original PCVal */
+#define METAG_PCLIN( LinVal )                              ( 0xFFFFFFFC & (\
+        ( (LinVal & METAG_LINPC_X2BIT) != 0 ) ? METAG_PCMINIM( LinVal ) :  \
+                                                               (LinVal)   ))
+
+/*
+ * Flush the MMCU Table cache privately for each thread. On cores that do not
+ * support per-thread flushing it will flush all threads mapping data.
+ */
+#define TBIMCACHE_TFLUSH(Thread)                                   do {\
+    ((volatile int *)( LINSYSCFLUSH_TxMMCU_BASE            +           \
+                      (LINSYSCFLUSH_TxMMCU_STRIDE*(Thread)) ))[0] = 0; \
+                                                             } while(0)
+
+/*
+ * To flush a single linear-matched cache line from the code cache. In
+ * cases where Minim is possible the METAC_LINPC operation must be used
+ * to pre-process the address being flushed.
+ */
+#define TBIICACHE_FLUSH( pAddr ) TBIXCACHE_WD (pAddr, CACHEW_ICACHE_BIT)
+
+/* To flush a single linear-matched mapping from code/data MMU table cache */
+#define TBIMCACHE_AFLUSH( pAddr, SegType )                                \
+    TBIXCACHE_WD(pAddr, CACHEW_TLBFLUSH_BIT + (                           \
+                 ((SegType) == TBID_SEGTYPE_TEXT) ? CACHEW_ICACHE_BIT : 0 ))
+
+/*
+ * To flush translation data corresponding to a range of addresses without
+ * using TBITCACHE_FLUSH to flush all of this threads translation data. It
+ * is necessary to know what stride (>= 4K) must be used to flush a specific
+ * region.
+ *
+ * For example direct mapped regions use the maximum page size (512K) which may
+ * mean that only one flush is needed to cover the sub-set of the direct
+ * mapped area used since it was setup.
+ *
+ * The function returns the stride on which flushes should be performed.
+ *
+ * If 0 is returned then the region is not subject to MMU caching, if -1 is
+ * returned then this indicates that only TBIMCACHE_TFLUSH can be used to
+ * flush the region concerned rather than TBIMCACHE_AFLUSH which this
+ * function is designed to support.
+ */
+int __TBIMMUCacheStride( const void *pStart, int Bytes );
+
+/*
+ * This function will use the above lower level functions to achieve a MMU
+ * table data flush in an optimal a fashion as possible. On a system that
+ * supports linear address based caching this function will also call the
+ * code or data cache flush functions to maintain address/data coherency.
+ *
+ * SegType should be TBID_SEGTYPE_TEXT if the address range is for code or
+ * any other value such as TBID_SEGTYPE_DATA for data. If an area is
+ * used in both ways then call this function twice; once for each.
+ */
+void __TBIMMUCacheFlush( const void *pStart, int Bytes, int SegType );
+
+/*
+ * Cached Core mode setup and flush functions allow one code and one data
+ * region of the corresponding global or local cache partion size to be
+ * locked into the corresponding cache memory. This prevents normal LRU
+ * logic discarding the code or data and avoids write-thru bandwidth in
+ * data areas. Code mappings are selected by specifying TBID_SEGTYPE_TEXT
+ * for SegType, otherwise data mappings are created.
+ * 
+ * Mode supplied should always contain the VALID bit and WINx selection data.
+ * Data areas will be mapped read-only if the WRITE bit is not added.
+ *
+ * The address returned by the Opt function will either be the same as that
+ * passed in (if optimisation cannot be supported) or the base of the new core
+ * cached region in linear address space. The returned address must be passed
+ * into the End function to remove the mapping when required. If a non-core
+ * cached memory address is passed into it the End function has no effect.
+ * Note that the region accessed MUST be flushed from the appropriate cache
+ * before the End function is called to deliver correct operation.
+ */
+void *__TBICoreCacheOpt( const void *pStart, int Bytes, int SegType, int Mode );
+void __TBICoreCacheEnd( const void *pOpt, int Bytes, int SegType );
+
+/*
+ * Optimise physical access channel and flush side effects before releasing
+ * the channel. If pStart is NULL the whole region must be flushed and this is
+ * done automatically by the channel release function if optimisation is
+ * enabled. Flushing the specific region that may have been accessed before
+ * release should optimises this process. On physically cached systems we do
+ * not flush the code/data caches only the MMU table data needs flushing.
+ */
+void __TBIPhysOptim( int Channel, int IMode, int DMode );
+void __TBIPhysFlush( int Channel, const void *pStart, int Bytes );
+#endif
+#endif /* ifdef TBI_1_3 */
+
+#endif /* _ASM_METAG_TBX_H_ */
diff --git a/arch/metag/include/asm/tcm.h b/arch/metag/include/asm/tcm.h
new file mode 100644 (file)
index 0000000..7711c31
--- /dev/null
@@ -0,0 +1,30 @@
+#ifndef __ASM_TCM_H__
+#define __ASM_TCM_H__
+
+#include <linux/ioport.h>
+#include <linux/list.h>
+
+struct tcm_allocation {
+       struct list_head list;
+       unsigned int tag;
+       unsigned long addr;
+       unsigned long size;
+};
+
+/*
+ * TCM memory region descriptor.
+ */
+struct tcm_region {
+       unsigned int tag;
+       struct resource res;
+};
+
+#define TCM_INVALID_TAG        0xffffffff
+
+unsigned long tcm_alloc(unsigned int tag, size_t len);
+void tcm_free(unsigned int tag, unsigned long addr, size_t len);
+unsigned int tcm_lookup_tag(unsigned long p);
+
+int tcm_add_region(struct tcm_region *reg);
+
+#endif
diff --git a/arch/metag/include/asm/thread_info.h b/arch/metag/include/asm/thread_info.h
new file mode 100644 (file)
index 0000000..0ecd34d
--- /dev/null
@@ -0,0 +1,155 @@
+/* thread_info.h: Meta low-level thread information
+ *
+ * Copyright (C) 2002  David Howells (dhowells@redhat.com)
+ * - Incorporating suggestions made by Linus Torvalds and Dave Miller
+ *
+ * Meta port by Imagination Technologies
+ */
+
+#ifndef _ASM_THREAD_INFO_H
+#define _ASM_THREAD_INFO_H
+
+#include <linux/compiler.h>
+#include <asm/page.h>
+
+#ifndef __ASSEMBLY__
+#include <asm/processor.h>
+#endif
+
+/*
+ * low level task data that entry.S needs immediate access to
+ * - this struct should fit entirely inside of one cache line
+ * - this struct shares the supervisor stack pages
+ * - if the contents of this structure are changed, the assembly constants must
+ *   also be changed
+ */
+#ifndef __ASSEMBLY__
+
+/* This must be 8 byte aligned so we can ensure stack alignment. */
+struct thread_info {
+       struct task_struct *task;       /* main task structure */
+       struct exec_domain *exec_domain;        /* execution domain */
+       unsigned long flags;    /* low level flags */
+       unsigned long status;   /* thread-synchronous flags */
+       u32 cpu;                /* current CPU */
+       int preempt_count;      /* 0 => preemptable, <0 => BUG */
+
+       mm_segment_t addr_limit;        /* thread address space */
+       struct restart_block restart_block;
+
+       u8 supervisor_stack[0];
+};
+
+#else /* !__ASSEMBLY__ */
+
+#include <generated/asm-offsets.h>
+
+#endif
+
+#define PREEMPT_ACTIVE         0x10000000
+
+#ifdef CONFIG_4KSTACKS
+#define THREAD_SHIFT           12
+#else
+#define THREAD_SHIFT           13
+#endif
+
+#if THREAD_SHIFT >= PAGE_SHIFT
+#define THREAD_SIZE_ORDER      (THREAD_SHIFT - PAGE_SHIFT)
+#else
+#define THREAD_SIZE_ORDER      0
+#endif
+
+#define THREAD_SIZE            (PAGE_SIZE << THREAD_SIZE_ORDER)
+
+#define STACK_WARN             (THREAD_SIZE/8)
+/*
+ * macros/functions for gaining access to the thread information structure
+ */
+#ifndef __ASSEMBLY__
+
+#define INIT_THREAD_INFO(tsk)                  \
+{                                              \
+       .task           = &tsk,                 \
+       .exec_domain    = &default_exec_domain, \
+       .flags          = 0,                    \
+       .cpu            = 0,                    \
+       .preempt_count  = INIT_PREEMPT_COUNT,   \
+       .addr_limit     = KERNEL_DS,            \
+       .restart_block = {                      \
+               .fn = do_no_restart_syscall,    \
+       },                                      \
+}
+
+#define init_thread_info       (init_thread_union.thread_info)
+#define init_stack             (init_thread_union.stack)
+
+/* how to get the current stack pointer from C */
+register unsigned long current_stack_pointer asm("A0StP") __used;
+
+/* how to get the thread information struct from C */
+static inline struct thread_info *current_thread_info(void)
+{
+       return (struct thread_info *)(current_stack_pointer &
+                                     ~(THREAD_SIZE - 1));
+}
+
+#define __HAVE_ARCH_KSTACK_END
+static inline int kstack_end(void *addr)
+{
+       return addr == (void *) (((unsigned long) addr & ~(THREAD_SIZE - 1))
+                                + sizeof(struct thread_info));
+}
+
+#endif
+
+/*
+ * thread information flags
+ * - these are process state flags that various assembly files may need to
+ *   access
+ * - pending work-to-be-done flags are in LSW
+ * - other flags in MSW
+ */
+#define TIF_SYSCALL_TRACE      0       /* syscall trace active */
+#define TIF_SIGPENDING         1       /* signal pending */
+#define TIF_NEED_RESCHED       2       /* rescheduling necessary */
+#define TIF_SINGLESTEP         3       /* restore singlestep on return to user
+                                          mode */
+#define TIF_SYSCALL_AUDIT      4       /* syscall auditing active */
+#define TIF_SECCOMP            5       /* secure computing */
+#define TIF_RESTORE_SIGMASK    6       /* restore signal mask in do_signal() */
+#define TIF_NOTIFY_RESUME      7       /* callback before returning to user */
+#define TIF_POLLING_NRFLAG      8      /* true if poll_idle() is polling
+                                          TIF_NEED_RESCHED */
+#define TIF_MEMDIE             9       /* is terminating due to OOM killer */
+#define TIF_SYSCALL_TRACEPOINT  10     /* syscall tracepoint instrumentation */
+
+
+#define _TIF_SYSCALL_TRACE     (1<<TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
+#define _TIF_SYSCALL_AUDIT     (1<<TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP           (1<<TIF_SECCOMP)
+#define _TIF_NOTIFY_RESUME     (1<<TIF_NOTIFY_RESUME)
+#define _TIF_RESTORE_SIGMASK   (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_SYSCALL_TRACEPOINT        (1<<TIF_SYSCALL_TRACEPOINT)
+
+/* work to do in syscall trace */
+#define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
+                                _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
+                                _TIF_SYSCALL_TRACEPOINT)
+
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK      (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING      | \
+                                _TIF_NEED_RESCHED  | _TIF_SYSCALL_AUDIT   | \
+                                _TIF_SINGLESTEP    | _TIF_RESTORE_SIGMASK | \
+                                _TIF_NOTIFY_RESUME)
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK         (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
+                                _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
+
+#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+
+#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/metag/include/asm/tlb.h b/arch/metag/include/asm/tlb.h
new file mode 100644 (file)
index 0000000..048282f
--- /dev/null
@@ -0,0 +1,36 @@
+#ifndef __ASM_METAG_TLB_H
+#define __ASM_METAG_TLB_H
+
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+/* Note, read http://lkml.org/lkml/2004/1/15/6 */
+
+#ifdef CONFIG_METAG_META12
+
+#define tlb_start_vma(tlb, vma)                                                      \
+       do {                                                                  \
+               if (!tlb->fullmm)                                             \
+                       flush_cache_range(vma, vma->vm_start, vma->vm_end);   \
+       } while (0)
+
+#define tlb_end_vma(tlb, vma)                                                \
+       do {                                                                  \
+               if (!tlb->fullmm)                                             \
+                       flush_tlb_range(vma, vma->vm_start, vma->vm_end);     \
+       } while (0)
+
+
+#else
+
+#define tlb_start_vma(tlb, vma)                        do { } while (0)
+#define tlb_end_vma(tlb, vma)                  do { } while (0)
+
+#endif
+
+#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
+#define tlb_flush(tlb)                         flush_tlb_mm((tlb)->mm)
+
+#include <asm-generic/tlb.h>
+
+#endif
diff --git a/arch/metag/include/asm/tlbflush.h b/arch/metag/include/asm/tlbflush.h
new file mode 100644 (file)
index 0000000..566acf9
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef __ASM_METAG_TLBFLUSH_H
+#define __ASM_METAG_TLBFLUSH_H
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <asm/metag_mem.h>
+#include <asm/pgalloc.h>
+
+/*
+ * TLB flushing:
+ *
+ *  - flush_tlb() flushes the current mm struct TLBs
+ *  - flush_tlb_all() flushes all processes TLBs
+ *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ *  - flush_tlb_page(vma, vmaddr) flushes one page
+ *  - flush_tlb_range(mm, start, end) flushes a range of pages
+ *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *  - flush_tlb_pgtables(mm, start, end) flushes a range of page tables
+ *
+ * FIXME: Meta 2 can flush single TLB entries.
+ *
+ */
+
+#if defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP)
+static inline void __flush_tlb(void)
+{
+       /* flush TLB entries for just the current hardware thread */
+       int thread = hard_processor_id();
+       metag_out32(0, (LINSYSCFLUSH_TxMMCU_BASE +
+                       LINSYSCFLUSH_TxMMCU_STRIDE * thread));
+}
+#else
+static inline void __flush_tlb(void)
+{
+       /* flush TLB entries for all hardware threads */
+       metag_out32(0, LINSYSCFLUSH_MMCU);
+}
+#endif /* defined(CONFIG_METAG_META21) && !defined(CONFIG_SMP) */
+
+#define flush_tlb() __flush_tlb()
+
+#define flush_tlb_all() __flush_tlb()
+
+#define local_flush_tlb_all() __flush_tlb()
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+       if (mm == current->active_mm)
+               __flush_tlb();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+                                 unsigned long addr)
+{
+       flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+                                  unsigned long start, unsigned long end)
+{
+       flush_tlb_mm(vma->vm_mm);
+}
+
+static inline void flush_tlb_pgtables(struct mm_struct *mm,
+                                     unsigned long start, unsigned long end)
+{
+       flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+                                         unsigned long end)
+{
+       flush_tlb_all();
+}
+
+#endif /* __ASM_METAG_TLBFLUSH_H */
+
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h
new file mode 100644 (file)
index 0000000..23f5118
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef _ASM_METAG_TOPOLOGY_H
+#define _ASM_METAG_TOPOLOGY_H
+
+#ifdef CONFIG_NUMA
+
+/* sched_domains SD_NODE_INIT for Meta machines */
+#define SD_NODE_INIT (struct sched_domain) {           \
+       .parent                 = NULL,                 \
+       .child                  = NULL,                 \
+       .groups                 = NULL,                 \
+       .min_interval           = 8,                    \
+       .max_interval           = 32,                   \
+       .busy_factor            = 32,                   \
+       .imbalance_pct          = 125,                  \
+       .cache_nice_tries       = 2,                    \
+       .busy_idx               = 3,                    \
+       .idle_idx               = 2,                    \
+       .newidle_idx            = 0,                    \
+       .wake_idx               = 0,                    \
+       .forkexec_idx           = 0,                    \
+       .flags                  = SD_LOAD_BALANCE       \
+                               | SD_BALANCE_FORK       \
+                               | SD_BALANCE_EXEC       \
+                               | SD_BALANCE_NEWIDLE    \
+                               | SD_SERIALIZE,         \
+       .last_balance           = jiffies,              \
+       .balance_interval       = 1,                    \
+       .nr_balance_failed      = 0,                    \
+}
+
+#define cpu_to_node(cpu)       ((void)(cpu), 0)
+#define parent_node(node)      ((void)(node), 0)
+
+#define cpumask_of_node(node)  ((void)node, cpu_online_mask)
+
+#define pcibus_to_node(bus)    ((void)(bus), -1)
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+                                       cpu_all_mask : \
+                                       cpumask_of_node(pcibus_to_node(bus)))
+
+#endif
+
+#define mc_capable()    (1)
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
+
+extern cpumask_t cpu_core_map[NR_CPUS];
+
+#define topology_core_cpumask(cpu)     (&cpu_core_map[cpu])
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_METAG_TOPOLOGY_H */
diff --git a/arch/metag/include/asm/traps.h b/arch/metag/include/asm/traps.h
new file mode 100644 (file)
index 0000000..ac80874
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ *  Copyright (C) 2005,2008 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef _METAG_TBIVECTORS_H
+#define _METAG_TBIVECTORS_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/tbx.h>
+
+typedef TBIRES (*kick_irq_func_t)(TBIRES, int, int, int, PTBI, int *);
+
+extern TBIRES kick_handler(TBIRES, int, int, int, PTBI);
+struct kick_irq_handler {
+       struct list_head list;
+       kick_irq_func_t func;
+};
+
+extern void kick_register_func(struct kick_irq_handler *);
+extern void kick_unregister_func(struct kick_irq_handler *);
+
+extern void head_end(TBIRES, unsigned long);
+extern void restart_critical_section(TBIRES State);
+extern TBIRES tail_end_sys(TBIRES, int, int *);
+static inline TBIRES tail_end(TBIRES state)
+{
+       return tail_end_sys(state, -1, NULL);
+}
+
+DECLARE_PER_CPU(PTBI, pTBI);
+extern PTBI pTBI_get(unsigned int);
+
+extern int ret_from_fork(TBIRES arg);
+
+extern int do_page_fault(struct pt_regs *regs, unsigned long address,
+                        unsigned int write_access, unsigned int trapno);
+
+extern TBIRES __TBIUnExpXXX(TBIRES State, int SigNum, int Triggers, int Inst,
+                           PTBI pTBI);
+
+#endif
+
+#endif /* _METAG_TBIVECTORS_H */
diff --git a/arch/metag/include/asm/uaccess.h b/arch/metag/include/asm/uaccess.h
new file mode 100644 (file)
index 0000000..0748b0a
--- /dev/null
@@ -0,0 +1,241 @@
+#ifndef __METAG_UACCESS_H
+#define __METAG_UACCESS_H
+
+/*
+ * User space memory access functions
+ */
+#include <linux/sched.h>
+
+#define VERIFY_READ    0
+#define VERIFY_WRITE   1
+
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)  ((mm_segment_t) { (s) })
+
+#define KERNEL_DS       MAKE_MM_SEG(0xFFFFFFFF)
+#define USER_DS                MAKE_MM_SEG(PAGE_OFFSET)
+
+#define get_ds()       (KERNEL_DS)
+#define get_fs()        (current_thread_info()->addr_limit)
+#define set_fs(x)       (current_thread_info()->addr_limit = (x))
+
+#define segment_eq(a, b)       ((a).seg == (b).seg)
+
+#define __kernel_ok (segment_eq(get_fs(), KERNEL_DS))
+/*
+ * Explicitly allow NULL pointers here. Parts of the kernel such
+ * as readv/writev use access_ok to validate pointers, but want
+ * to allow NULL pointers for various reasons. NULL pointers are
+ * safe to allow through because the first page is not mappable on
+ * Meta.
+ *
+ * We also wish to avoid letting user code access the system area
+ * and the kernel half of the address space.
+ */
+#define __user_bad(addr, size) (((addr) > 0 && (addr) < META_MEMORY_BASE) || \
+                               ((addr) > PAGE_OFFSET &&                \
+                                (addr) < LINCORE_BASE))
+
+static inline int __access_ok(unsigned long addr, unsigned long size)
+{
+       return __kernel_ok || !__user_bad(addr, size);
+}
+
+#define access_ok(type, addr, size) __access_ok((unsigned long)(addr), \
+                                               (unsigned long)(size))
+
+static inline int verify_area(int type, const void *addr, unsigned long size)
+{
+       return access_ok(type, addr, size) ? 0 : -EFAULT;
+}
+
+/*
+ * The exception table consists of pairs of addresses: the first is the
+ * address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue.  No registers are
+ * modified, so it is entirely up to the continuation code to figure out
+ * what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+struct exception_table_entry {
+       unsigned long insn, fixup;
+};
+
+extern int fixup_exception(struct pt_regs *regs);
+
+/*
+ * These are the main single-value transfer routines.  They automatically
+ * use the right size if we just have the right pointer type.
+ */
+
+#define put_user(x, ptr) \
+       __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+#define __put_user(x, ptr) \
+       __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
+
+extern void __put_user_bad(void);
+
+#define __put_user_nocheck(x, ptr, size)               \
+({                                                      \
+       long __pu_err;                                  \
+       __put_user_size((x), (ptr), (size), __pu_err);  \
+       __pu_err;                                       \
+})
+
+#define __put_user_check(x, ptr, size)                         \
+({                                                              \
+       long __pu_err = -EFAULT;                                \
+       __typeof__(*(ptr)) __user *__pu_addr = (ptr);           \
+       if (access_ok(VERIFY_WRITE, __pu_addr, size))           \
+               __put_user_size((x), __pu_addr, (size), __pu_err);      \
+       __pu_err;                                               \
+})
+
+extern long __put_user_asm_b(unsigned int x, void __user *addr);
+extern long __put_user_asm_w(unsigned int x, void __user *addr);
+extern long __put_user_asm_d(unsigned int x, void __user *addr);
+extern long __put_user_asm_l(unsigned long long x, void __user *addr);
+
+#define __put_user_size(x, ptr, size, retval)                  \
+do {                                                            \
+       retval = 0;                                             \
+       switch (size) {                                         \
+       case 1:                                                         \
+               retval = __put_user_asm_b((unsigned int)x, ptr); break; \
+       case 2:                                                         \
+               retval = __put_user_asm_w((unsigned int)x, ptr); break; \
+       case 4:                                                         \
+               retval = __put_user_asm_d((unsigned int)x, ptr); break; \
+       case 8:                                                         \
+               retval = __put_user_asm_l((unsigned long long)x, ptr); break; \
+       default:                                                        \
+               __put_user_bad();                                       \
+       }                                                               \
+} while (0)
+
+#define get_user(x, ptr) \
+       __get_user_check((x), (ptr), sizeof(*(ptr)))
+#define __get_user(x, ptr) \
+       __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
+
+extern long __get_user_bad(void);
+
+#define __get_user_nocheck(x, ptr, size)                       \
+({                                                              \
+       long __gu_err, __gu_val;                                \
+       __get_user_size(__gu_val, (ptr), (size), __gu_err);     \
+       (x) = (__typeof__(*(ptr)))__gu_val;                     \
+       __gu_err;                                               \
+})
+
+#define __get_user_check(x, ptr, size)                                 \
+({                                                                      \
+       long __gu_err = -EFAULT, __gu_val = 0;                          \
+       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);             \
+       if (access_ok(VERIFY_READ, __gu_addr, size))                    \
+               __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
+       (x) = (__typeof__(*(ptr)))__gu_val;                             \
+       __gu_err;                                                       \
+})
+
+extern unsigned char __get_user_asm_b(const void __user *addr, long *err);
+extern unsigned short __get_user_asm_w(const void __user *addr, long *err);
+extern unsigned int __get_user_asm_d(const void __user *addr, long *err);
+
+#define __get_user_size(x, ptr, size, retval)                  \
+do {                                                            \
+       retval = 0;                                             \
+       switch (size) {                                         \
+       case 1:                                                 \
+               x = __get_user_asm_b(ptr, &retval); break;      \
+       case 2:                                                 \
+               x = __get_user_asm_w(ptr, &retval); break;      \
+       case 4:                                                 \
+               x = __get_user_asm_d(ptr, &retval); break;      \
+       default:                                                \
+               (x) = __get_user_bad();                         \
+       }                                                       \
+} while (0)
+
+/*
+ * Copy a null terminated string from userspace.
+ *
+ * Must return:
+ * -EFAULT             for an exception
+ * count               if we hit the buffer limit
+ * bytes copied                if we hit a null byte
+ * (without the null byte)
+ */
+
+extern long __must_check __strncpy_from_user(char *dst, const char __user *src,
+                                            long count);
+
+#define strncpy_from_user(dst, src, count) __strncpy_from_user(dst, src, count)
+
+/*
+ * Return the size of a string (including the ending 0)
+ *
+ * Return 0 on exception, a value greater than N if too long
+ */
+extern long __must_check strnlen_user(const char __user *src, long count);
+
+#define strlen_user(str) strnlen_user(str, 32767)
+
+extern unsigned long __must_check __copy_user_zeroing(void *to,
+                                                     const void __user *from,
+                                                     unsigned long n);
+
+static inline unsigned long
+copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+       if (access_ok(VERIFY_READ, from, n))
+               return __copy_user_zeroing(to, from, n);
+       return n;
+}
+
+#define __copy_from_user(to, from, n) __copy_user_zeroing(to, from, n)
+#define __copy_from_user_inatomic __copy_from_user
+
+extern unsigned long __must_check __copy_user(void __user *to,
+                                             const void *from,
+                                             unsigned long n);
+
+static inline unsigned long copy_to_user(void __user *to, const void *from,
+                                        unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               return __copy_user(to, from, n);
+       return n;
+}
+
+#define __copy_to_user(to, from, n) __copy_user(to, from, n)
+#define __copy_to_user_inatomic __copy_to_user
+
+/*
+ * Zero Userspace
+ */
+
+extern unsigned long __must_check __do_clear_user(void __user *to,
+                                                 unsigned long n);
+
+static inline unsigned long clear_user(void __user *to, unsigned long n)
+{
+       if (access_ok(VERIFY_WRITE, to, n))
+               return __do_clear_user(to, n);
+       return n;
+}
+
+#define __clear_user(to, n)            __do_clear_user(to, n)
+
+#endif /* _METAG_UACCESS_H */
diff --git a/arch/metag/include/asm/unistd.h b/arch/metag/include/asm/unistd.h
new file mode 100644 (file)
index 0000000..32955a1
--- /dev/null
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <uapi/asm/unistd.h>
+
+#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/metag/include/asm/user_gateway.h b/arch/metag/include/asm/user_gateway.h
new file mode 100644 (file)
index 0000000..e404c09
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies
+ */
+
+#ifndef __ASM_METAG_USER_GATEWAY_H
+#define __ASM_METAG_USER_GATEWAY_H
+
+#include <asm/page.h>
+
+/* Page of kernel code accessible to userspace. */
+#define USER_GATEWAY_PAGE      0x6ffff000
+/* Offset of TLS pointer array in gateway page. */
+#define USER_GATEWAY_TLS       0x100
+
+#ifndef __ASSEMBLY__
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+/* Kernel mapping of the gateway page. */
+extern void *gateway_page;
+
+static inline void set_gateway_tls(void __user *tls_ptr)
+{
+       void **gateway_tls = (void **)(gateway_page + USER_GATEWAY_TLS +
+                                      hard_processor_id() * 4);
+
+       *gateway_tls = (__force void *)tls_ptr;
+#ifdef CONFIG_METAG_META12
+       /* Avoid cache aliases on virtually tagged cache. */
+       __builtin_dcache_flush((void *)USER_GATEWAY_PAGE + USER_GATEWAY_TLS +
+                                      hard_processor_id() * sizeof(void *));
+#endif
+}
+
+extern int __kuser_get_tls(void);
+extern char *__kuser_get_tls_end[];
+
+extern int __kuser_cmpxchg(int, int, unsigned long *);
+extern char *__kuser_cmpxchg_end[];
+
+#endif
+
+#endif
diff --git a/arch/metag/include/uapi/asm/Kbuild b/arch/metag/include/uapi/asm/Kbuild
new file mode 100644 (file)
index 0000000..876c71f
--- /dev/null
@@ -0,0 +1,13 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += byteorder.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sigcontext.h
+header-y += siginfo.h
+header-y += swab.h
+header-y += unistd.h
+
+generic-y += mman.h
+generic-y += setup.h
diff --git a/arch/metag/include/uapi/asm/byteorder.h b/arch/metag/include/uapi/asm/byteorder.h
new file mode 100644 (file)
index 0000000..9558416
--- /dev/null
@@ -0,0 +1 @@
+#include <linux/byteorder/little_endian.h>
diff --git a/arch/metag/include/uapi/asm/ptrace.h b/arch/metag/include/uapi/asm/ptrace.h
new file mode 100644 (file)
index 0000000..45d9780
--- /dev/null
@@ -0,0 +1,113 @@
+#ifndef _UAPI_METAG_PTRACE_H
+#define _UAPI_METAG_PTRACE_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * These are the layouts of the regsets returned by the GETREGSET ptrace call
+ */
+
+/* user_gp_regs::status */
+
+/* CBMarker bit (indicates catch state / catch replay) */
+#define USER_GP_REGS_STATUS_CATCH_BIT          (1 << 22)
+#define USER_GP_REGS_STATUS_CATCH_S            22
+/* LSM_STEP field (load/store multiple step) */
+#define USER_GP_REGS_STATUS_LSM_STEP_BITS      (0x7 << 8)
+#define USER_GP_REGS_STATUS_LSM_STEP_S         8
+/* SCC bit (indicates split 16x16 condition flags) */
+#define USER_GP_REGS_STATUS_SCC_BIT            (1 << 4)
+#define USER_GP_REGS_STATUS_SCC_S              4
+
+/* normal condition flags */
+/* CF_Z bit (Zero flag) */
+#define USER_GP_REGS_STATUS_CF_Z_BIT           (1 << 3)
+#define USER_GP_REGS_STATUS_CF_Z_S             3
+/* CF_N bit (Negative flag) */
+#define USER_GP_REGS_STATUS_CF_N_BIT           (1 << 2)
+#define USER_GP_REGS_STATUS_CF_N_S             2
+/* CF_V bit (oVerflow flag) */
+#define USER_GP_REGS_STATUS_CF_V_BIT           (1 << 1)
+#define USER_GP_REGS_STATUS_CF_V_S             1
+/* CF_C bit (Carry flag) */
+#define USER_GP_REGS_STATUS_CF_C_BIT           (1 << 0)
+#define USER_GP_REGS_STATUS_CF_C_S             0
+
+/* split 16x16 condition flags */
+/* SCF_LZ bit (Low Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_LZ_BIT         (1 << 3)
+#define USER_GP_REGS_STATUS_SCF_LZ_S           3
+/* SCF_HZ bit (High Zero flag) */
+#define USER_GP_REGS_STATUS_SCF_HZ_BIT         (1 << 2)
+#define USER_GP_REGS_STATUS_SCF_HZ_S           2
+/* SCF_HC bit (High Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_HC_BIT         (1 << 1)
+#define USER_GP_REGS_STATUS_SCF_HC_S           1
+/* SCF_LC bit (Low Carry flag) */
+#define USER_GP_REGS_STATUS_SCF_LC_BIT         (1 << 0)
+#define USER_GP_REGS_STATUS_SCF_LC_S           0
+
+/**
+ * struct user_gp_regs - User general purpose registers
+ * @dx:                GP data unit regs (dx[reg][unit] = D{unit:0-1}.{reg:0-7})
+ * @ax:                GP address unit regs (ax[reg][unit] = A{unit:0-1}.{reg:0-3})
+ * @pc:                PC register
+ * @status:    TXSTATUS register (condition flags, LSM_STEP etc)
+ * @rpt:       TXRPT registers (branch repeat counter)
+ * @bpobits:   TXBPOBITS register ("branch prediction other" bits)
+ * @mode:      TXMODE register
+ * @_pad1:     Reserved padding to make sizeof obviously 64bit aligned
+ *
+ * This is the user-visible general purpose register state structure.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_PRSTATUS.
+ *
+ * It is also used in the signal context.
+ */
+struct user_gp_regs {
+       unsigned long dx[8][2];
+       unsigned long ax[4][2];
+       unsigned long pc;
+       unsigned long status;
+       unsigned long rpt;
+       unsigned long bpobits;
+       unsigned long mode;
+       unsigned long _pad1;
+};
+
+/**
+ * struct user_cb_regs - User catch buffer registers
+ * @flags:     TXCATCH0 register (fault flags)
+ * @addr:      TXCATCH1 register (fault address)
+ * @data:      TXCATCH2 and TXCATCH3 registers (low and high data word)
+ *
+ * This is the user-visible catch buffer register state structure containing
+ * information about a failed memory access, and allowing the access to be
+ * modified and replayed.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_CBUF.
+ */
+struct user_cb_regs {
+       unsigned long flags;
+       unsigned long addr;
+       unsigned long long data;
+};
+
+/**
+ * struct user_rp_state - User read pipeline state
+ * @entries:   Read pipeline entries
+ * @mask:      Mask of valid pipeline entries (RPMask from TXDIVTIME register)
+ *
+ * This is the user-visible read pipeline state structure containing the entries
+ * currently in the read pipeline and the mask of valid entries.
+ *
+ * It can be accessed through PTRACE_GETREGSET with NT_METAG_RPIPE.
+ */
+struct user_rp_state {
+       unsigned long long entries[6];
+       unsigned long mask;
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_METAG_PTRACE_H */
diff --git a/arch/metag/include/uapi/asm/resource.h b/arch/metag/include/uapi/asm/resource.h
new file mode 100644 (file)
index 0000000..526d23c
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _UAPI_METAG_RESOURCE_H
+#define _UAPI_METAG_RESOURCE_H
+
+#define _STK_LIM_MAX    (1 << 28)
+#include <asm-generic/resource.h>
+
+#endif /* _UAPI_METAG_RESOURCE_H */
diff --git a/arch/metag/include/uapi/asm/sigcontext.h b/arch/metag/include/uapi/asm/sigcontext.h
new file mode 100644 (file)
index 0000000..ef79a91
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef _ASM_METAG_SIGCONTEXT_H
+#define _ASM_METAG_SIGCONTEXT_H
+
+#include <asm/ptrace.h>
+
+/*
+ * In a sigcontext structure we need to store the active state of the
+ * user process so that it does not get trashed when we call the signal
+ * handler. That not really the same as a user context that we are
+ * going to store on syscall etc.
+ */
+struct sigcontext {
+       struct user_gp_regs regs;       /* needs to be first */
+
+       /*
+        * Catch registers describing a memory fault.
+        * If USER_GP_REGS_STATUS_CATCH_BIT is set in regs.status then catch
+        * buffers have been saved and will be replayed on sigreturn.
+        * Clear that bit to discard the catch state instead of replaying it.
+        */
+       struct user_cb_regs cb;
+
+       /*
+        * Read pipeline state. This will get restored on sigreturn.
+        */
+       struct user_rp_state rp;
+
+       unsigned long oldmask;
+};
+
+#endif
diff --git a/arch/metag/include/uapi/asm/siginfo.h b/arch/metag/include/uapi/asm/siginfo.h
new file mode 100644 (file)
index 0000000..b2e0c8b
--- /dev/null
@@ -0,0 +1,8 @@
+#ifndef _METAG_SIGINFO_H
+#define _METAG_SIGINFO_H
+
+#define __ARCH_SI_TRAPNO
+
+#include <asm-generic/siginfo.h>
+
+#endif
diff --git a/arch/metag/include/uapi/asm/swab.h b/arch/metag/include/uapi/asm/swab.h
new file mode 100644 (file)
index 0000000..1076b3a
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef __ASM_METAG_SWAB_H
+#define __ASM_METAG_SWAB_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm-generic/swab.h>
+
+static inline __attribute_const__ __u16 __arch_swab16(__u16 x)
+{
+       return __builtin_metag_bswaps(x);
+}
+#define __arch_swab16 __arch_swab16
+
+static inline __attribute_const__ __u32 __arch_swab32(__u32 x)
+{
+       return __builtin_metag_bswap(x);
+}
+#define __arch_swab32 __arch_swab32
+
+static inline __attribute_const__ __u64 __arch_swab64(__u64 x)
+{
+       return __builtin_metag_bswapll(x);
+}
+#define __arch_swab64 __arch_swab64
+
+#endif /* __ASM_METAG_SWAB_H */
diff --git a/arch/metag/include/uapi/asm/unistd.h b/arch/metag/include/uapi/asm/unistd.h
new file mode 100644 (file)
index 0000000..b80b8e8
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* metag-specific syscalls. */
+#define __NR_metag_setglobalbit                (__NR_arch_specific_syscall + 1)
+__SYSCALL(__NR_metag_setglobalbit, sys_metag_setglobalbit)
+#define __NR_metag_set_fpu_flags       (__NR_arch_specific_syscall + 2)
+__SYSCALL(__NR_metag_set_fpu_flags, sys_metag_set_fpu_flags)
+#define __NR_metag_set_tls             (__NR_arch_specific_syscall + 3)
+__SYSCALL(__NR_metag_set_tls, sys_metag_set_tls)
+#define __NR_metag_get_tls             (__NR_arch_specific_syscall + 4)
+__SYSCALL(__NR_metag_get_tls, sys_metag_get_tls)
diff --git a/arch/metag/kernel/.gitignore b/arch/metag/kernel/.gitignore
new file mode 100644 (file)
index 0000000..c5f676c
--- /dev/null
@@ -0,0 +1 @@
+vmlinux.lds
diff --git a/arch/metag/kernel/Makefile b/arch/metag/kernel/Makefile
new file mode 100644 (file)
index 0000000..d7675f4
--- /dev/null
@@ -0,0 +1,39 @@
+#
+# Makefile for the Linux/Meta kernel.
+#
+
+extra-y        += head.o
+extra-y        += vmlinux.lds
+
+obj-y  += cachepart.o
+obj-y  += clock.o
+obj-y  += core_reg.o
+obj-y  += devtree.o
+obj-y  += dma.o
+obj-y  += irq.o
+obj-y  += kick.o
+obj-y  += machines.o
+obj-y  += process.o
+obj-y  += ptrace.o
+obj-y  += setup.o
+obj-y  += signal.o
+obj-y  += stacktrace.o
+obj-y  += sys_metag.o
+obj-y  += tbiunexp.o
+obj-y  += time.o
+obj-y  += topology.o
+obj-y  += traps.o
+obj-y  += user_gateway.o
+
+obj-$(CONFIG_PERF_EVENTS)              += perf/
+
+obj-$(CONFIG_METAG_COREMEM)            += coremem.o
+obj-$(CONFIG_METAG_DA)                 += da.o
+obj-$(CONFIG_DYNAMIC_FTRACE)           += ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER)          += ftrace_stub.o
+obj-$(CONFIG_MODULES)                  += metag_ksyms.o
+obj-$(CONFIG_MODULES)                  += module.o
+obj-$(CONFIG_PERF_EVENTS)              += perf_callchain.o
+obj-$(CONFIG_SMP)                      += smp.o
+obj-$(CONFIG_METAG_SUSPEND_MEM)                += suspend.o
+obj-$(CONFIG_METAG_USER_TCM)           += tcm.o
diff --git a/arch/metag/kernel/asm-offsets.c b/arch/metag/kernel/asm-offsets.c
new file mode 100644 (file)
index 0000000..bfc9205
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * This program is used to generate definitions needed by
+ * assembly language modules.
+ *
+ */
+
+#include <linux/kbuild.h>
+#include <linux/thread_info.h>
+
+int main(void)
+{
+       DEFINE(THREAD_INFO_SIZE, sizeof(struct thread_info));
+       return 0;
+}
diff --git a/arch/metag/kernel/cachepart.c b/arch/metag/kernel/cachepart.c
new file mode 100644 (file)
index 0000000..3a589df
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Meta cache partition manipulation.
+ *
+ * Copyright 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/errno.h>
+#include <asm/processor.h>
+#include <asm/cachepart.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+
+#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
+#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))
+
+#define CACHE_ASSOCIATIVITY 4 /* 4 way set-assosiative */
+#define ICACHE 0
+#define DCACHE 1
+
+/* The CORE_CONFIG2 register is not available on Meta 1 */
+#ifdef CONFIG_METAG_META21
+unsigned int get_dcache_size(void)
+{
+       unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+       return 0x1000 << ((config2 & METAC_CORECFG2_DCSZ_BITS)
+                               >> METAC_CORECFG2_DCSZ_S);
+}
+
+unsigned int get_icache_size(void)
+{
+       unsigned int config2 = metag_in32(METAC_CORE_CONFIG2);
+       return 0x1000 << ((config2 & METAC_CORE_C2ICSZ_BITS)
+                               >> METAC_CORE_C2ICSZ_S);
+}
+
+unsigned int get_global_dcache_size(void)
+{
+       unsigned int cpart = metag_in32(SYSC_DCPART(hard_processor_id()));
+       unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+       return (get_dcache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+unsigned int get_global_icache_size(void)
+{
+       unsigned int cpart = metag_in32(SYSC_ICPART(hard_processor_id()));
+       unsigned int temp = cpart & SYSC_xCPARTG_AND_BITS;
+       return (get_icache_size() * ((temp >> SYSC_xCPARTG_AND_S) + 1)) >> 4;
+}
+
+static unsigned int get_thread_cache_size(unsigned int cache, int thread_id)
+{
+       unsigned int cache_size;
+       unsigned int t_cache_part;
+       unsigned int isEnabled;
+       unsigned int offset = 0;
+       isEnabled = (cache == DCACHE ? metag_in32(MMCU_DCACHE_CTRL_ADDR) & 0x1 :
+               metag_in32(MMCU_ICACHE_CTRL_ADDR) & 0x1);
+       if (!isEnabled)
+               return 0;
+#if PAGE_OFFSET >= LINGLOBAL_BASE
+       /* Checking for global cache */
+       cache_size = (cache == DCACHE ? get_global_dache_size() :
+               get_global_icache_size());
+       offset = 8;
+#else
+       cache_size = (cache == DCACHE ? get_dcache_size() :
+               get_icache_size());
+#endif
+       t_cache_part = (cache == DCACHE ?
+               (metag_in32(SYSC_DCPART(thread_id)) >> offset) & 0xF :
+               (metag_in32(SYSC_ICPART(thread_id)) >> offset) & 0xF);
+       switch (t_cache_part) {
+       case 0xF:
+               return cache_size;
+       case 0x7:
+               return cache_size / 2;
+       case 0x3:
+               return cache_size / 4;
+       case 0x1:
+               return cache_size / 8;
+       case 0:
+               return cache_size / 16;
+       }
+       return -1;
+}
+
+void check_for_cache_aliasing(int thread_id)
+{
+       unsigned int thread_cache_size;
+       unsigned int cache_type;
+       for (cache_type = ICACHE; cache_type <= DCACHE; cache_type++) {
+               thread_cache_size =
+                               get_thread_cache_size(cache_type, thread_id);
+               if (thread_cache_size < 0)
+                       pr_emerg("Can't read %s cache size", \
+                                cache_type ? "DCACHE" : "ICACHE");
+               else if (thread_cache_size == 0)
+                       /* Cache is off. No need to check for aliasing */
+                       continue;
+               if (thread_cache_size / CACHE_ASSOCIATIVITY > PAGE_SIZE) {
+                       pr_emerg("Cache aliasing detected in %s on Thread %d",
+                                cache_type ? "DCACHE" : "ICACHE", thread_id);
+                       pr_warn("Total %s size: %u bytes",
+                               cache_type ? "DCACHE" : "ICACHE ",
+                               cache_type ? get_dcache_size()
+                               : get_icache_size());
+                       pr_warn("Thread %s size: %d bytes",
+                               cache_type ? "CACHE" : "ICACHE",
+                               thread_cache_size);
+                       pr_warn("Page Size: %lu bytes", PAGE_SIZE);
+               }
+       }
+}
+
+#else
+
+void check_for_cache_aliasing(int thread_id)
+{
+       return;
+}
+
+#endif
diff --git a/arch/metag/kernel/clock.c b/arch/metag/kernel/clock.c
new file mode 100644 (file)
index 0000000..defc840
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * arch/metag/kernel/clock.c
+ *
+ * Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+
+#include <asm/param.h>
+#include <asm/clock.h>
+
+struct meta_clock_desc _meta_clock;
+
+/* Default machine get_core_freq callback. */
+static unsigned long get_core_freq_default(void)
+{
+#ifdef CONFIG_METAG_META21
+       /*
+        * Meta 2 cores divide down the core clock for the Meta timers, so we
+        * can estimate the core clock from the divider.
+        */
+       return (metag_in32(EXPAND_TIMER_DIV) + 1) * 1000000;
+#else
+       /*
+        * On Meta 1 we don't know the core clock, but assuming the Meta timer
+        * is correct it can be estimated based on loops_per_jiffy.
+        */
+       return (loops_per_jiffy * HZ * 5) >> 1;
+#endif
+}
+
+/**
+ * setup_meta_clocks() - Set up the Meta clock.
+ * @desc:      Clock descriptor usually provided by machine description
+ *
+ * Ensures all callbacks are valid.
+ */
+void __init setup_meta_clocks(struct meta_clock_desc *desc)
+{
+       /* copy callbacks */
+       if (desc)
+               _meta_clock = *desc;
+
+       /* set fallback functions */
+       if (!_meta_clock.get_core_freq)
+               _meta_clock.get_core_freq = get_core_freq_default;
+}
+
diff --git a/arch/metag/kernel/core_reg.c b/arch/metag/kernel/core_reg.c
new file mode 100644 (file)
index 0000000..671cce8
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ *  Support for reading and writing Meta core internal registers.
+ *
+ *  Copyright (C) 2011 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/export.h>
+
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define UNIT_BIT_MASK          TXUXXRXRQ_UXX_BITS
+#define REG_BIT_MASK           TXUXXRXRQ_RX_BITS
+#define THREAD_BIT_MASK                TXUXXRXRQ_TX_BITS
+
+#define UNIT_SHIFTS            TXUXXRXRQ_UXX_S
+#define REG_SHIFTS             TXUXXRXRQ_RX_S
+#define THREAD_SHIFTS          TXUXXRXRQ_TX_S
+
+#define UNIT_VAL(x)            (((x) << UNIT_SHIFTS) & UNIT_BIT_MASK)
+#define REG_VAL(x)             (((x) << REG_SHIFTS) & REG_BIT_MASK)
+#define THREAD_VAL(x)          (((x) << THREAD_SHIFTS) & THREAD_BIT_MASK)
+
+/*
+ * core_reg_write() - modify the content of a register in a core unit.
+ * @unit:      The unit to be modified.
+ * @reg:       Register number within the unit.
+ * @thread:    The thread we want to access.
+ * @val:       The new value to write.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+void core_reg_write(int unit, int reg, int thread, unsigned int val)
+{
+       unsigned long flags;
+
+       /* TXUCT_ID has its own memory mapped registers */
+       if (unit == TXUCT_ID) {
+               void __iomem *cu_reg = __CU_addr(thread, reg);
+               metag_out32(val, cu_reg);
+               return;
+       }
+
+       __global_lock2(flags);
+
+       /* wait for ready */
+       while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+               udelay(10);
+
+       /* set the value to write */
+       metag_out32(val, TXUXXRXDT);
+
+       /* set the register to write */
+       val = UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread);
+       metag_out32(val, TXUXXRXRQ);
+
+       /* wait for finish */
+       while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+               udelay(10);
+
+       __global_unlock2(flags);
+}
+EXPORT_SYMBOL(core_reg_write);
+
+/*
+ * core_reg_read() - read the content of a register in a core unit.
+ * @unit:      The unit to be modified.
+ * @reg:       Register number within the unit.
+ * @thread:    The thread we want to access.
+ *
+ * Check asm/metag_regs.h for a list/defines of supported units (ie: TXUPC_ID,
+ * TXUTR_ID, etc), and regnums within the units (ie: TXMASKI_REGNUM,
+ * TXPOLLI_REGNUM, etc).
+ */
+unsigned int core_reg_read(int unit, int reg, int thread)
+{
+       unsigned long flags;
+       unsigned int val;
+
+       /* TXUCT_ID has its own memory mapped registers */
+       if (unit == TXUCT_ID) {
+               void __iomem *cu_reg = __CU_addr(thread, reg);
+               val = metag_in32(cu_reg);
+               return val;
+       }
+
+       __global_lock2(flags);
+
+       /* wait for ready */
+       while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+               udelay(10);
+
+       /* set the register to read */
+       val = (UNIT_VAL(unit) | REG_VAL(reg) | THREAD_VAL(thread) |
+                                                       TXUXXRXRQ_RDnWR_BIT);
+       metag_out32(val, TXUXXRXRQ);
+
+       /* wait for finish */
+       while (!(metag_in32(TXUXXRXRQ) & TXUXXRXRQ_DREADY_BIT))
+               udelay(10);
+
+       /* read the register value */
+       val = metag_in32(TXUXXRXDT);
+
+       __global_unlock2(flags);
+
+       return val;
+}
+EXPORT_SYMBOL(core_reg_read);
diff --git a/arch/metag/kernel/da.c b/arch/metag/kernel/da.c
new file mode 100644 (file)
index 0000000..52aabb6
--- /dev/null
@@ -0,0 +1,23 @@
+/*
+ * Meta DA JTAG debugger control.
+ *
+ * Copyright 2012 Imagination Technologies Ltd.
+ */
+
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <asm/da.h>
+#include <asm/metag_mem.h>
+
+bool _metag_da_present;
+
+int __init metag_da_probe(void)
+{
+       _metag_da_present = (metag_in32(T0VECINT_BHALT) == 1);
+       if (_metag_da_present)
+               pr_info("DA present\n");
+       else
+               pr_info("DA not present\n");
+       return 0;
+}
diff --git a/arch/metag/kernel/devtree.c b/arch/metag/kernel/devtree.c
new file mode 100644 (file)
index 0000000..7cd0252
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ *  linux/arch/metag/kernel/devtree.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Based on ARM version:
+ *  Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/mach/arch.h>
+
+void __init early_init_dt_add_memory_arch(u64 base, u64 size)
+{
+       pr_err("%s(%llx, %llx)\n",
+              __func__, base, size);
+}
+
+void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
+{
+       return alloc_bootmem_align(size, align);
+}
+
+/**
+ * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
+ * @dt:                virtual address pointer to dt blob
+ *
+ * If a dtb was passed to the kernel, then use it to choose the correct
+ * machine_desc and to setup the system.
+ */
+struct machine_desc * __init setup_machine_fdt(void *dt)
+{
+       struct boot_param_header *devtree = dt;
+       struct machine_desc *mdesc, *mdesc_best = NULL;
+       unsigned int score, mdesc_score = ~1;
+       unsigned long dt_root;
+       const char *model;
+
+       /* check device tree validity */
+       if (be32_to_cpu(devtree->magic) != OF_DT_HEADER)
+               return NULL;
+
+       /* Search the mdescs for the 'best' compatible value match */
+       initial_boot_params = devtree;
+       dt_root = of_get_flat_dt_root();
+
+       for_each_machine_desc(mdesc) {
+               score = of_flat_dt_match(dt_root, mdesc->dt_compat);
+               if (score > 0 && score < mdesc_score) {
+                       mdesc_best = mdesc;
+                       mdesc_score = score;
+               }
+       }
+       if (!mdesc_best) {
+               const char *prop;
+               long size;
+
+               pr_err("\nError: unrecognized/unsupported device tree compatible list:\n[ ");
+
+               prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
+               if (prop) {
+                       while (size > 0) {
+                               printk("'%s' ", prop);
+                               size -= strlen(prop) + 1;
+                               prop += strlen(prop) + 1;
+                       }
+               }
+               printk("]\n\n");
+
+               dump_machine_table(); /* does not return */
+       }
+
+       model = of_get_flat_dt_prop(dt_root, "model", NULL);
+       if (!model)
+               model = of_get_flat_dt_prop(dt_root, "compatible", NULL);
+       if (!model)
+               model = "<unknown>";
+       pr_info("Machine: %s, model: %s\n", mdesc_best->name, model);
+
+       /* Retrieve various information from the /chosen node */
+       of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line);
+
+       return mdesc_best;
+}
+
+/**
+ * copy_fdt - Copy device tree into non-init memory.
+ *
+ * We must copy the flattened device tree blob into non-init memory because the
+ * unflattened device tree will reference the strings in it directly.
+ */
+void __init copy_fdt(void)
+{
+       void *alloc = early_init_dt_alloc_memory_arch(
+                       be32_to_cpu(initial_boot_params->totalsize), 0x40);
+       if (alloc) {
+               memcpy(alloc, initial_boot_params,
+                      be32_to_cpu(initial_boot_params->totalsize));
+               initial_boot_params = alloc;
+       }
+}
diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c
new file mode 100644 (file)
index 0000000..8c00ded
--- /dev/null
@@ -0,0 +1,507 @@
+/*
+ *  Meta version derived from arch/powerpc/lib/dma-noncoherent.c
+ *    Copyright (C) 2008 Imagination Technologies Ltd.
+ *
+ *  PowerPC version derived from arch/arm/mm/consistent.c
+ *    Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
+ *
+ *  Copyright (C) 2000 Russell King
+ *
+ * Consistent memory allocators.  Used for DMA devices that want to
+ * share uncached memory with the processor core.  The function return
+ * is the virtual address and 'dma_handle' is the physical address.
+ * Mostly stolen from the ARM port, with some changes for PowerPC.
+ *                                             -- Dan
+ *
+ * Reorganized to get rid of the arch-specific consistent_* functions
+ * and provide non-coherent implementations for the DMA API. -Matt
+ *
+ * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
+ * implementation. This is pulled straight from ARM and barely
+ * modified. -Matt
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+
+#define CONSISTENT_OFFSET(x)   (((unsigned long)(x) - CONSISTENT_START) \
+                                       >> PAGE_SHIFT)
+
+static u64 get_coherent_dma_mask(struct device *dev)
+{
+       u64 mask = ~0ULL;
+
+       if (dev) {
+               mask = dev->coherent_dma_mask;
+
+               /*
+                * Sanity check the DMA mask - it must be non-zero, and
+                * must be able to be satisfied by a DMA allocation.
+                */
+               if (mask == 0) {
+                       dev_warn(dev, "coherent DMA mask is unset\n");
+                       return 0;
+               }
+       }
+
+       return mask;
+}
+/*
+ * This is the page table (2MB) covering uncached, DMA consistent allocations
+ */
+static pte_t *consistent_pte;
+static DEFINE_SPINLOCK(consistent_lock);
+
+/*
+ * VM region handling support.
+ *
+ * This should become something generic, handling VM region allocations for
+ * vmalloc and similar (ioremap, module space, etc).
+ *
+ * I envisage vmalloc()'s supporting vm_struct becoming:
+ *
+ *  struct vm_struct {
+ *    struct metag_vm_region   region;
+ *    unsigned long    flags;
+ *    struct page      **pages;
+ *    unsigned int     nr_pages;
+ *    unsigned long    phys_addr;
+ *  };
+ *
+ * get_vm_area() would then call metag_vm_region_alloc with an appropriate
+ * struct metag_vm_region head (eg):
+ *
+ *  struct metag_vm_region vmalloc_head = {
+ *     .vm_list        = LIST_HEAD_INIT(vmalloc_head.vm_list),
+ *     .vm_start       = VMALLOC_START,
+ *     .vm_end         = VMALLOC_END,
+ *  };
+ *
+ * However, vmalloc_head.vm_start is variable (typically, it is dependent on
+ * the amount of RAM found at boot time.)  I would imagine that get_vm_area()
+ * would have to initialise this each time prior to calling
+ * metag_vm_region_alloc().
+ */
+struct metag_vm_region {
+       struct list_head vm_list;
+       unsigned long vm_start;
+       unsigned long vm_end;
+       struct page             *vm_pages;
+       int                     vm_active;
+};
+
+static struct metag_vm_region consistent_head = {
+       .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
+       .vm_start = CONSISTENT_START,
+       .vm_end = CONSISTENT_END,
+};
+
+static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region
+                                                    *head, size_t size,
+                                                    gfp_t gfp)
+{
+       unsigned long addr = head->vm_start, end = head->vm_end - size;
+       unsigned long flags;
+       struct metag_vm_region *c, *new;
+
+       new = kmalloc(sizeof(struct metag_vm_region), gfp);
+       if (!new)
+               goto out;
+
+       spin_lock_irqsave(&consistent_lock, flags);
+
+       list_for_each_entry(c, &head->vm_list, vm_list) {
+               if ((addr + size) < addr)
+                       goto nospc;
+               if ((addr + size) <= c->vm_start)
+                       goto found;
+               addr = c->vm_end;
+               if (addr > end)
+                       goto nospc;
+       }
+
+found:
+       /*
+        * Insert this entry _before_ the one we found.
+        */
+       list_add_tail(&new->vm_list, &c->vm_list);
+       new->vm_start = addr;
+       new->vm_end = addr + size;
+       new->vm_active = 1;
+
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       return new;
+
+nospc:
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       kfree(new);
+out:
+       return NULL;
+}
+
+static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region
+                                                   *head, unsigned long addr)
+{
+       struct metag_vm_region *c;
+
+       list_for_each_entry(c, &head->vm_list, vm_list) {
+               if (c->vm_active && c->vm_start == addr)
+                       goto out;
+       }
+       c = NULL;
+out:
+       return c;
+}
+
+/*
+ * Allocate DMA-coherent memory space and return both the kernel remapped
+ * virtual and bus address for that space.
+ */
+void *dma_alloc_coherent(struct device *dev, size_t size,
+                        dma_addr_t *handle, gfp_t gfp)
+{
+       struct page *page;
+       struct metag_vm_region *c;
+       unsigned long order;
+       u64 mask = get_coherent_dma_mask(dev);
+       u64 limit;
+
+       if (!consistent_pte) {
+               pr_err("%s: not initialised\n", __func__);
+               dump_stack();
+               return NULL;
+       }
+
+       if (!mask)
+               goto no_page;
+       size = PAGE_ALIGN(size);
+       limit = (mask + 1) & ~mask;
+       if ((limit && size >= limit)
+           || size >= (CONSISTENT_END - CONSISTENT_START)) {
+               pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n",
+                       size, mask);
+               return NULL;
+       }
+
+       order = get_order(size);
+
+       if (mask != 0xffffffff)
+               gfp |= GFP_DMA;
+
+       page = alloc_pages(gfp, order);
+       if (!page)
+               goto no_page;
+
+       /*
+        * Invalidate any data that might be lurking in the
+        * kernel direct-mapped region for device DMA.
+        */
+       {
+               void *kaddr = page_address(page);
+               memset(kaddr, 0, size);
+               flush_dcache_region(kaddr, size);
+       }
+
+       /*
+        * Allocate a virtual address in the consistent mapping region.
+        */
+       c = metag_vm_region_alloc(&consistent_head, size,
+                                 gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
+       if (c) {
+               unsigned long vaddr = c->vm_start;
+               pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
+               struct page *end = page + (1 << order);
+
+               c->vm_pages = page;
+               split_page(page, order);
+
+               /*
+                * Set the "dma handle"
+                */
+               *handle = page_to_bus(page);
+
+               do {
+                       BUG_ON(!pte_none(*pte));
+
+                       SetPageReserved(page);
+                       set_pte_at(&init_mm, vaddr,
+                                  pte, mk_pte(page,
+                                              pgprot_writecombine
+                                              (PAGE_KERNEL)));
+                       page++;
+                       pte++;
+                       vaddr += PAGE_SIZE;
+               } while (size -= PAGE_SIZE);
+
+               /*
+                * Free the otherwise unused pages.
+                */
+               while (page < end) {
+                       __free_page(page);
+                       page++;
+               }
+
+               return (void *)c->vm_start;
+       }
+
+       if (page)
+               __free_pages(page, order);
+no_page:
+       return NULL;
+}
+EXPORT_SYMBOL(dma_alloc_coherent);
+
+/*
+ * free a page as defined by the above mapping.
+ */
+void dma_free_coherent(struct device *dev, size_t size,
+                      void *vaddr, dma_addr_t dma_handle)
+{
+       struct metag_vm_region *c;
+       unsigned long flags, addr;
+       pte_t *ptep;
+
+       size = PAGE_ALIGN(size);
+
+       spin_lock_irqsave(&consistent_lock, flags);
+
+       c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr);
+       if (!c)
+               goto no_area;
+
+       c->vm_active = 0;
+       if ((c->vm_end - c->vm_start) != size) {
+               pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
+                      __func__, c->vm_end - c->vm_start, size);
+               dump_stack();
+               size = c->vm_end - c->vm_start;
+       }
+
+       ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+       addr = c->vm_start;
+       do {
+               pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
+               unsigned long pfn;
+
+               ptep++;
+               addr += PAGE_SIZE;
+
+               if (!pte_none(pte) && pte_present(pte)) {
+                       pfn = pte_pfn(pte);
+
+                       if (pfn_valid(pfn)) {
+                               struct page *page = pfn_to_page(pfn);
+                               ClearPageReserved(page);
+
+                               __free_page(page);
+                               continue;
+                       }
+               }
+
+               pr_crit("%s: bad page in kernel page table\n",
+                       __func__);
+       } while (size -= PAGE_SIZE);
+
+       flush_tlb_kernel_range(c->vm_start, c->vm_end);
+
+       list_del(&c->vm_list);
+
+       spin_unlock_irqrestore(&consistent_lock, flags);
+
+       kfree(c);
+       return;
+
+no_area:
+       spin_unlock_irqrestore(&consistent_lock, flags);
+       pr_err("%s: trying to free invalid coherent area: %p\n",
+              __func__, vaddr);
+       dump_stack();
+}
+EXPORT_SYMBOL(dma_free_coherent);
+
+
+static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
+                   void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+       int ret = -ENXIO;
+
+       unsigned long flags, user_size, kern_size;
+       struct metag_vm_region *c;
+
+       user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
+       spin_lock_irqsave(&consistent_lock, flags);
+       c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
+       spin_unlock_irqrestore(&consistent_lock, flags);
+
+       if (c) {
+               unsigned long off = vma->vm_pgoff;
+
+               kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT;
+
+               if (off < kern_size &&
+                   user_size <= (kern_size - off)) {
+                       ret = remap_pfn_range(vma, vma->vm_start,
+                                             page_to_pfn(c->vm_pages) + off,
+                                             user_size << PAGE_SHIFT,
+                                             vma->vm_page_prot);
+               }
+       }
+
+
+       return ret;
+}
+
+int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
+                     void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+       return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_coherent);
+
+int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
+                         void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+       vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+       return dma_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+EXPORT_SYMBOL(dma_mmap_writecombine);
+
+
+
+
+/*
+ * Initialise the consistent memory allocation.
+ */
+static int __init dma_alloc_init(void)
+{
+       pgd_t *pgd, *pgd_k;
+       pud_t *pud, *pud_k;
+       pmd_t *pmd, *pmd_k;
+       pte_t *pte;
+       int ret = 0;
+
+       do {
+               int offset = pgd_index(CONSISTENT_START);
+               pgd = pgd_offset(&init_mm, CONSISTENT_START);
+               pud = pud_alloc(&init_mm, pgd, CONSISTENT_START);
+               pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START);
+               if (!pmd) {
+                       pr_err("%s: no pmd tables\n", __func__);
+                       ret = -ENOMEM;
+                       break;
+               }
+               WARN_ON(!pmd_none(*pmd));
+
+               pte = pte_alloc_kernel(pmd, CONSISTENT_START);
+               if (!pte) {
+                       pr_err("%s: no pte tables\n", __func__);
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               pgd_k = ((pgd_t *) mmu_get_base()) + offset;
+               pud_k = pud_offset(pgd_k, CONSISTENT_START);
+               pmd_k = pmd_offset(pud_k, CONSISTENT_START);
+               set_pmd(pmd_k, *pmd);
+
+               consistent_pte = pte;
+       } while (0);
+
+       return ret;
+}
+early_initcall(dma_alloc_init);
+
+/*
+ * make an area consistent to devices.
+ */
+void dma_sync_for_device(void *vaddr, size_t size, int dma_direction)
+{
+       /*
+        * Ensure any writes get through the write combiner. This is necessary
+        * even with DMA_FROM_DEVICE, or the write may dirty the cache after
+        * we've invalidated it and get written back during the DMA.
+        */
+
+       barrier();
+
+       switch (dma_direction) {
+       case DMA_BIDIRECTIONAL:
+               /*
+                * Writeback to ensure the device can see our latest changes and
+                * so that we have no dirty lines, and invalidate the cache
+                * lines too in preparation for receiving the buffer back
+                * (dma_sync_for_cpu) later.
+                */
+               flush_dcache_region(vaddr, size);
+               break;
+       case DMA_TO_DEVICE:
+               /*
+                * Writeback to ensure the device can see our latest changes.
+                * There's no need to invalidate as the device shouldn't write
+                * to the buffer.
+                */
+               writeback_dcache_region(vaddr, size);
+               break;
+       case DMA_FROM_DEVICE:
+               /*
+                * Invalidate to ensure we have no dirty lines that could get
+                * written back during the DMA. It's also safe to flush
+                * (writeback) here if necessary.
+                */
+               invalidate_dcache_region(vaddr, size);
+               break;
+       case DMA_NONE:
+               BUG();
+       }
+
+       wmb();
+}
+EXPORT_SYMBOL(dma_sync_for_device);
+
+/*
+ * make an area consistent to the core.
+ */
+void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction)
+{
+       /*
+        * Hardware L2 cache prefetch doesn't occur across 4K physical
+        * boundaries, however according to Documentation/DMA-API-HOWTO.txt
+        * kmalloc'd memory is DMA'able, so accesses in nearby memory could
+        * trigger a cache fill in the DMA buffer.
+        *
+        * This should never cause dirty lines, so a flush or invalidate should
+        * be safe to allow us to see data from the device.
+        */
+       if (_meta_l2c_pf_is_enabled()) {
+               switch (dma_direction) {
+               case DMA_BIDIRECTIONAL:
+               case DMA_FROM_DEVICE:
+                       invalidate_dcache_region(vaddr, size);
+                       break;
+               case DMA_TO_DEVICE:
+                       /* The device shouldn't have written to the buffer */
+                       break;
+               case DMA_NONE:
+                       BUG();
+               }
+       }
+
+       rmb();
+}
+EXPORT_SYMBOL(dma_sync_for_cpu);
diff --git a/arch/metag/kernel/ftrace.c b/arch/metag/kernel/ftrace.c
new file mode 100644 (file)
index 0000000..a774f32
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ * Dynamic ftrace support.
+ */
+
+#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
+
+#define D04_MOVT_TEMPLATE      0x02200005
+#define D04_CALL_TEMPLATE      0xAC200005
+#define D1RTP_MOVT_TEMPLATE    0x03200005
+#define D1RTP_CALL_TEMPLATE    0xAC200006
+
+static const unsigned long NOP[2] = {0xa0fffffe, 0xa0fffffe};
+static unsigned long movt_and_call_insn[2];
+
+static unsigned char *ftrace_nop_replace(void)
+{
+       return (char *)&NOP[0];
+}
+
+static unsigned char *ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+       unsigned long hi16, low16;
+
+       hi16 = (addr & 0xffff0000) >> 13;
+       low16 = (addr & 0x0000ffff) << 3;
+
+       /*
+        * The compiler makes the call to mcount_wrapper()
+        * (Meta's wrapper around mcount()) through the register
+        * D0.4. So whenever we're patching one of those compiler-generated
+        * calls we also need to go through D0.4. Otherwise use D1RtP.
+        */
+       if (pc == (unsigned long)&ftrace_call) {
+               writel(D1RTP_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+               writel(D1RTP_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+       } else {
+               writel(D04_MOVT_TEMPLATE | hi16, &movt_and_call_insn[0]);
+               writel(D04_CALL_TEMPLATE | low16, &movt_and_call_insn[1]);
+       }
+
+       return (unsigned char *)&movt_and_call_insn[0];
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned char *old_code,
+                             unsigned char *new_code)
+{
+       unsigned char replaced[MCOUNT_INSN_SIZE];
+
+       /*
+        * Note: Due to modules and __init, code can
+        *  disappear and change, we need to protect against faulting
+        *  as well as code changing.
+        *
+        * No real locking needed, this code is run through
+        * kstop_machine.
+        */
+
+       /* read the text we want to modify */
+       if (probe_kernel_read(replaced, (void *)pc, MCOUNT_INSN_SIZE))
+               return -EFAULT;
+
+       /* Make sure it is what we expect it to be */
+       if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
+               return -EINVAL;
+
+       /* replace the text with the new text */
+       if (probe_kernel_write((void *)pc, new_code, MCOUNT_INSN_SIZE))
+               return -EPERM;
+
+       flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+       return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+       int ret;
+       unsigned long pc;
+       unsigned char old[MCOUNT_INSN_SIZE], *new;
+
+       pc = (unsigned long)&ftrace_call;
+       memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
+       new = ftrace_call_replace(pc, (unsigned long)func);
+       ret = ftrace_modify_code(pc, old, new);
+
+       return ret;
+}
+
+int ftrace_make_nop(struct module *mod,
+                   struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned char *new, *old;
+       unsigned long ip = rec->ip;
+
+       old = ftrace_call_replace(ip, addr);
+       new = ftrace_nop_replace();
+
+       return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+       unsigned char *new, *old;
+       unsigned long ip = rec->ip;
+
+       old = ftrace_nop_replace();
+       new = ftrace_call_replace(ip, addr);
+
+       return ftrace_modify_code(ip, old, new);
+}
+
+/* run from kstop_machine */
+int __init ftrace_dyn_arch_init(void *data)
+{
+       /* The return code is returned via data */
+       writel(0, data);
+
+       return 0;
+}
diff --git a/arch/metag/kernel/ftrace_stub.S b/arch/metag/kernel/ftrace_stub.S
new file mode 100644 (file)
index 0000000..e70bff7
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2008 Imagination Technologies Ltd.
+ * Licensed under the GPL
+ *
+ */
+
+#include <asm/ftrace.h>
+
+       .text
+#ifdef CONFIG_DYNAMIC_FTRACE
+       .global _mcount_wrapper
+       .type   _mcount_wrapper,function
+_mcount_wrapper:
+       MOV     PC,D0.4
+
+       .global _ftrace_caller
+       .type   _ftrace_caller,function
+_ftrace_caller:
+       MOVT    D0Re0,#HI(_function_trace_stop)
+       ADD     D0Re0,D0Re0,#LO(_function_trace_stop)
+       GETD    D0Re0,[D0Re0]
+       CMP     D0Re0,#0
+       BEQ     $Lcall_stub
+       MOV     PC,D0.4
+$Lcall_stub:
+       MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+       MOV     D1Ar1, D0.4
+       MOV     D0Ar2, D1RtP
+       SUB     D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+
+       .global _ftrace_call
+_ftrace_call:
+       MOVT    D1RtP,#HI(_ftrace_stub)
+       CALL    D1RtP,#LO(_ftrace_stub)
+       GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+       GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+       GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+       GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+       MOV     PC, D0.4
+#else
+
+       .global _mcount_wrapper
+       .type   _mcount_wrapper,function
+_mcount_wrapper:
+       MOVT    D0Re0,#HI(_function_trace_stop)
+       ADD     D0Re0,D0Re0,#LO(_function_trace_stop)
+       GETD    D0Re0,[D0Re0]
+       CMP     D0Re0,#0
+       BEQ     $Lcall_mcount
+       MOV     PC,D0.4
+$Lcall_mcount:
+       MSETL   [A0StP], D0Ar6, D0Ar4, D0Ar2, D0.4
+       MOV     D1Ar1, D0.4
+       MOV     D0Ar2, D1RtP
+       MOVT    D0Re0,#HI(_ftrace_trace_function)
+       ADD     D0Re0,D0Re0,#LO(_ftrace_trace_function)
+       GET     D1Ar3,[D0Re0]
+       MOVT    D1Re0,#HI(_ftrace_stub)
+       ADD     D1Re0,D1Re0,#LO(_ftrace_stub)
+       CMP     D1Ar3,D1Re0
+       BEQ     $Ltrace_exit
+       MOV     D1RtP,D1Ar3
+       SUB     D1Ar1,D1Ar1,#MCOUNT_INSN_SIZE
+       SWAP    PC,D1RtP
+$Ltrace_exit:
+       GETL    D0.4,  D1RtP, [A0StP++#(-8)]
+       GETL    D0Ar2, D1Ar1, [A0StP++#(-8)]
+       GETL    D0Ar4, D1Ar3, [A0StP++#(-8)]
+       GETL    D0Ar6, D1Ar5, [A0StP++#(-8)]
+       MOV     PC, D0.4
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+       .global _ftrace_stub
+_ftrace_stub:
+       MOV     PC,D1RtP
diff --git a/arch/metag/kernel/head.S b/arch/metag/kernel/head.S
new file mode 100644 (file)
index 0000000..969dffa
--- /dev/null
@@ -0,0 +1,57 @@
+       ! Copyright 2005,2006,2007,2009 Imagination Technologies
+
+#include <linux/init.h>
+#include <generated/asm-offsets.h>
+#undef __exit
+
+       __HEAD
+       ! Setup the stack and get going into _metag_start_kernel
+       .global __start
+       .type   __start,function
+__start:
+       ! D1Ar1 contains pTBI (ISTAT)
+       ! D0Ar2 contains pTBI
+       ! D1Ar3 contains __pTBISegs
+       ! D0Ar4 contains kernel arglist pointer
+
+       MOVT    D0Re0,#HI(___pTBIs)
+       ADD     D0Re0,D0Re0,#LO(___pTBIs)
+       SETL    [D0Re0],D0Ar2,D1Ar1
+       MOVT    D0Re0,#HI(___pTBISegs)
+       ADD     D0Re0,D0Re0,#LO(___pTBISegs)
+       SETD    [D0Re0],D1Ar3
+       MOV     A0FrP,#0
+       MOV     D0Re0,#0
+       MOV     D1Re0,#0
+       MOV     D1Ar3,#0
+       MOV     D1Ar1,D0Ar4                     !Store kernel boot params
+       MOV     D1Ar5,#0
+       MOV     D0Ar6,#0
+#ifdef CONFIG_METAG_DSP
+       MOV     D0.8,#0
+#endif
+       MOVT    A0StP,#HI(_init_thread_union)
+       ADD     A0StP,A0StP,#LO(_init_thread_union)
+       ADD     A0StP,A0StP,#THREAD_INFO_SIZE
+       MOVT    D1RtP,#HI(_metag_start_kernel)
+       CALL    D1RtP,#LO(_metag_start_kernel)
+       .size   __start,.-__start
+
+       !! Needed by TBX
+       .global __exit
+       .type   __exit,function
+__exit:
+       XOR     TXENABLE,D0Re0,D0Re0
+       .size   __exit,.-__exit
+
+#ifdef CONFIG_SMP
+       .global _secondary_startup
+       .type _secondary_startup,function
+_secondary_startup:
+       MOVT    A0StP,#HI(_secondary_data_stack)
+       ADD     A0StP,A0StP,#LO(_secondary_data_stack)
+       GETD    A0StP,[A0StP]
+       ADD     A0StP,A0StP,#THREAD_INFO_SIZE
+       B       _secondary_start_kernel
+       .size   _secondary_startup,.-_secondary_startup
+#endif
diff --git a/arch/metag/kernel/irq.c b/arch/metag/kernel/irq.c
new file mode 100644 (file)
index 0000000..87707ef
--- /dev/null
@@ -0,0 +1,323 @@
+/*
+ * Linux/Meta general interrupt handling code
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqchip/metag.h>
+#include <linux/irqdomain.h>
+#include <linux/ratelimit.h>
+
+#include <asm/core_reg.h>
+#include <asm/mach/arch.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_4KSTACKS
+union irq_ctx {
+       struct thread_info      tinfo;
+       u32                     stack[THREAD_SIZE/sizeof(u32)];
+};
+
+static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
+#endif
+
+struct irq_domain *root_domain;
+
+static unsigned int startup_meta_irq(struct irq_data *data)
+{
+       tbi_startup_interrupt(data->hwirq);
+       return 0;
+}
+
+static void shutdown_meta_irq(struct irq_data *data)
+{
+       tbi_shutdown_interrupt(data->hwirq);
+}
+
+void do_IRQ(int irq, struct pt_regs *regs)
+{
+       struct pt_regs *old_regs = set_irq_regs(regs);
+#ifdef CONFIG_4KSTACKS
+       struct irq_desc *desc;
+       union irq_ctx *curctx, *irqctx;
+       u32 *isp;
+#endif
+
+       irq_enter();
+
+       irq = irq_linear_revmap(root_domain, irq);
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+       /* Debugging check for stack overflow: is there less than 1KB free? */
+       {
+               unsigned long sp;
+
+               sp = __core_reg_get(A0StP);
+               sp &= THREAD_SIZE - 1;
+
+               if (unlikely(sp > (THREAD_SIZE - 1024)))
+                       pr_err("Stack overflow in do_IRQ: %ld\n", sp);
+       }
+#endif
+
+
+#ifdef CONFIG_4KSTACKS
+       curctx = (union irq_ctx *) current_thread_info();
+       irqctx = hardirq_ctx[smp_processor_id()];
+
+       /*
+        * this is where we switch to the IRQ stack. However, if we are
+        * already using the IRQ stack (because we interrupted a hardirq
+        * handler) we can't do that and just have to keep using the
+        * current stack (which is the irq stack already after all)
+        */
+       if (curctx != irqctx) {
+               /* build the stack frame on the IRQ stack */
+               isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+               irqctx->tinfo.task = curctx->tinfo.task;
+
+               /*
+                * Copy the softirq bits in preempt_count so that the
+                * softirq checks work in the hardirq context.
+                */
+               irqctx->tinfo.preempt_count =
+                       (irqctx->tinfo.preempt_count & ~SOFTIRQ_MASK) |
+                       (curctx->tinfo.preempt_count & SOFTIRQ_MASK);
+
+               desc = irq_to_desc(irq);
+
+               asm volatile (
+                       "MOV   D0.5,%0\n"
+                       "MOV   D1Ar1,%1\n"
+                       "MOV   D1RtP,%2\n"
+                       "MOV   D0Ar2,%3\n"
+                       "SWAP  A0StP,D0.5\n"
+                       "SWAP  PC,D1RtP\n"
+                       "MOV   A0StP,D0.5\n"
+                       :
+                       : "r" (isp), "r" (irq), "r" (desc->handle_irq),
+                         "r" (desc)
+                       : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+                         "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+                         "D0.5"
+                       );
+       } else
+#endif
+               generic_handle_irq(irq);
+
+       irq_exit();
+
+       set_irq_regs(old_regs);
+}
+
+#ifdef CONFIG_4KSTACKS
+
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+/*
+ * allocate per-cpu stacks for hardirq and for softirq processing
+ */
+void irq_ctx_init(int cpu)
+{
+       union irq_ctx *irqctx;
+
+       if (hardirq_ctx[cpu])
+               return;
+
+       irqctx = (union irq_ctx *) &hardirq_stack[cpu * THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = HARDIRQ_OFFSET;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       hardirq_ctx[cpu] = irqctx;
+
+       irqctx = (union irq_ctx *) &softirq_stack[cpu * THREAD_SIZE];
+       irqctx->tinfo.task              = NULL;
+       irqctx->tinfo.exec_domain       = NULL;
+       irqctx->tinfo.cpu               = cpu;
+       irqctx->tinfo.preempt_count     = 0;
+       irqctx->tinfo.addr_limit        = MAKE_MM_SEG(0);
+
+       softirq_ctx[cpu] = irqctx;
+
+       pr_info("CPU %u irqstacks, hard=%p soft=%p\n",
+               cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
+}
+
+void irq_ctx_exit(int cpu)
+{
+       hardirq_ctx[smp_processor_id()] = NULL;
+}
+
+extern asmlinkage void __do_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+       unsigned long flags;
+       struct thread_info *curctx;
+       union irq_ctx *irqctx;
+       u32 *isp;
+
+       if (in_interrupt())
+               return;
+
+       local_irq_save(flags);
+
+       if (local_softirq_pending()) {
+               curctx = current_thread_info();
+               irqctx = softirq_ctx[smp_processor_id()];
+               irqctx->tinfo.task = curctx->task;
+
+               /* build the stack frame on the softirq stack */
+               isp = (u32 *) ((char *)irqctx + sizeof(struct thread_info));
+
+               asm volatile (
+                       "MOV   D0.5,%0\n"
+                       "SWAP  A0StP,D0.5\n"
+                       "CALLR D1RtP,___do_softirq\n"
+                       "MOV   A0StP,D0.5\n"
+                       :
+                       : "r" (isp)
+                       : "memory", "cc", "D1Ar1", "D0Ar2", "D1Ar3", "D0Ar4",
+                         "D1Ar5", "D0Ar6", "D0Re0", "D1Re0", "D0.4", "D1RtP",
+                         "D0.5"
+                       );
+               /*
+                * Shouldn't happen, we returned above if in_interrupt():
+                */
+               WARN_ON_ONCE(softirq_count());
+       }
+
+       local_irq_restore(flags);
+}
+#endif
+
+static struct irq_chip meta_irq_type = {
+       .name = "META-IRQ",
+       .irq_startup = startup_meta_irq,
+       .irq_shutdown = shutdown_meta_irq,
+};
+
+/**
+ * tbisig_map() - Map a TBI signal number to a virtual IRQ number.
+ * @hw:                Number of the TBI signal. Must be in range.
+ *
+ * Returns:    The virtual IRQ number of the TBI signal number IRQ specified by
+ *             @hw.
+ */
+int tbisig_map(unsigned int hw)
+{
+       return irq_create_mapping(root_domain, hw);
+}
+
+/**
+ * metag_tbisig_map() - map a tbi signal to a Linux virtual IRQ number
+ * @d:         root irq domain
+ * @irq:       virtual irq number
+ * @hw:                hardware irq number (TBI signal number)
+ *
+ * This sets up a virtual irq for a specified TBI signal number.
+ */
+static int metag_tbisig_map(struct irq_domain *d, unsigned int irq,
+                           irq_hw_number_t hw)
+{
+#ifdef CONFIG_SMP
+       irq_set_chip_and_handler(irq, &meta_irq_type, handle_percpu_irq);
+#else
+       irq_set_chip_and_handler(irq, &meta_irq_type, handle_simple_irq);
+#endif
+       return 0;
+}
+
+static const struct irq_domain_ops metag_tbisig_domain_ops = {
+       .map = metag_tbisig_map,
+};
+
+/*
+ * void init_IRQ(void)
+ *
+ * Parameters: None
+ *
+ * Returns:    Nothing
+ *
+ * This function should be called during kernel startup to initialize
+ * the IRQ handling routines.
+ */
+void __init init_IRQ(void)
+{
+       root_domain = irq_domain_add_linear(NULL, 32,
+                                           &metag_tbisig_domain_ops, NULL);
+       if (unlikely(!root_domain))
+               panic("init_IRQ: cannot add root IRQ domain");
+
+       irq_ctx_init(smp_processor_id());
+
+       init_internal_IRQ();
+       init_external_IRQ();
+
+       if (machine_desc->init_irq)
+               machine_desc->init_irq();
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+       if (machine_desc->nr_irqs)
+               nr_irqs = machine_desc->nr_irqs;
+       return 0;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void route_irq(struct irq_data *data, unsigned int irq, unsigned int cpu)
+{
+       struct irq_desc *desc = irq_to_desc(irq);
+       struct irq_chip *chip = irq_data_get_irq_chip(data);
+
+       raw_spin_lock_irq(&desc->lock);
+       if (chip->irq_set_affinity)
+               chip->irq_set_affinity(data, cpumask_of(cpu), false);
+       raw_spin_unlock_irq(&desc->lock);
+}
+
+/*
+ * The CPU has been marked offline.  Migrate IRQs off this CPU.  If
+ * the affinity settings do not allow other CPUs, force them onto any
+ * available CPU.
+ */
+void migrate_irqs(void)
+{
+       unsigned int i, cpu = smp_processor_id();
+       struct irq_desc *desc;
+
+       for_each_irq_desc(i, desc) {
+               struct irq_data *data = irq_desc_get_irq_data(desc);
+               unsigned int newcpu;
+
+               if (irqd_is_per_cpu(data))
+                       continue;
+
+               if (!cpumask_test_cpu(cpu, data->affinity))
+                       continue;
+
+               newcpu = cpumask_any_and(data->affinity, cpu_online_mask);
+
+               if (newcpu >= nr_cpu_ids) {
+                       pr_info_ratelimited("IRQ%u no longer affine to CPU%u\n",
+                                           i, cpu);
+
+                       cpumask_setall(data->affinity);
+                       newcpu = cpumask_any_and(data->affinity,
+                                                cpu_online_mask);
+               }
+
+               route_irq(data, i, newcpu);
+       }
+}
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/metag/kernel/kick.c b/arch/metag/kernel/kick.c
new file mode 100644 (file)
index 0000000..50fcbec
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ *  Copyright (C) 2009 Imagination Technologies
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * The Meta KICK interrupt mechanism is generally a useful feature, so
+ * we provide an interface for registering multiple interrupt
+ * handlers. All the registered interrupt handlers are "chained". When
+ * a KICK interrupt is received the first function in the list is
+ * called. If that interrupt handler cannot handle the KICK the next
+ * one is called, then the next until someone handles it (or we run
+ * out of functions). As soon as one function handles the interrupt no
+ * other handlers are called.
+ *
+ * The only downside of chaining interrupt handlers is that each
+ * handler must be able to detect whether the KICK was intended for it
+ * or not.  For example, when the IPI handler runs and it sees that
+ * there are no IPI messages it must not signal that the KICK was
+ * handled, thereby giving the other handlers a chance to run.
+ *
+ * The reason that we provide our own interface for calling KICK
+ * handlers instead of using the generic kernel infrastructure is that
+ * the KICK handlers require access to a CPU's pTBI structure. So we
+ * pass it as an argument.
+ */
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#include <asm/traps.h>
+
+/*
+ * All accesses/manipulations of kick_handlers_list should be
+ * performed while holding kick_handlers_lock.
+ */
+static DEFINE_SPINLOCK(kick_handlers_lock);
+static LIST_HEAD(kick_handlers_list);
+
+void kick_register_func(struct kick_irq_handler *kh)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&kick_handlers_lock, flags);
+
+       list_add_tail(&kh->list, &kick_handlers_list);
+
+       spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_register_func);
+
+void kick_unregister_func(struct kick_irq_handler *kh)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&kick_handlers_lock, flags);
+
+       list_del(&kh->list);
+
+       spin_unlock_irqrestore(&kick_handlers_lock, flags);
+}
+EXPORT_SYMBOL(kick_unregister_func);
+
+TBIRES
+kick_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+       struct kick_irq_handler *kh;
+       struct list_head *lh;
+       int handled = 0;
+       TBIRES ret;
+
+       head_end(State, ~INTS_OFF_MASK);
+
+       /* If we interrupted user code handle any critical sections. */
+       if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+               restart_critical_section(State);
+
+       trace_hardirqs_off();
+
+       /*
+        * There is no need to disable interrupts here because we
+        * can't nest KICK interrupts in a KICK interrupt handler.
+        */
+       spin_lock(&kick_handlers_lock);
+
+       list_for_each(lh, &kick_handlers_list) {
+               kh = list_entry(lh, struct kick_irq_handler, list);
+
+               ret = kh->func(State, SigNum, Triggers, Inst, pTBI, &handled);
+               if (handled)
+                       break;
+       }
+
+       spin_unlock(&kick_handlers_lock);
+
+       WARN_ON(!handled);
+
+       return tail_end(ret);
+}
diff --git a/arch/metag/kernel/machines.c b/arch/metag/kernel/machines.c
new file mode 100644 (file)
index 0000000..1edf6ba
--- /dev/null
@@ -0,0 +1,20 @@
+/*
+ *  arch/metag/kernel/machines.c
+ *
+ *  Copyright (C) 2012 Imagination Technologies Ltd.
+ *
+ *  Generic Meta Boards.
+ */
+
+#include <linux/init.h>
+#include <asm/irq.h>
+#include <asm/mach/arch.h>
+
+static const char *meta_boards_compat[] __initdata = {
+       "img,meta",
+       NULL,
+};
+
+MACHINE_START(META, "Generic Meta")
+       .dt_compat      = meta_boards_compat,
+MACHINE_END
diff --git a/arch/metag/kernel/metag_ksyms.c b/arch/metag/kernel/metag_ksyms.c
new file mode 100644 (file)
index 0000000..ec872ef
--- /dev/null
@@ -0,0 +1,49 @@
+#include <linux/export.h>
+
+#include <asm/div64.h>
+#include <asm/ftrace.h>
+#include <asm/page.h>
+#include <asm/string.h>
+#include <asm/tbx.h>
+
+EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(copy_page);
+
+#ifdef CONFIG_FLATMEM
+/* needed for the pfn_valid macro */
+EXPORT_SYMBOL(max_pfn);
+EXPORT_SYMBOL(min_low_pfn);
+#endif
+
+/* TBI symbols */
+EXPORT_SYMBOL(__TBI);
+EXPORT_SYMBOL(__TBIFindSeg);
+EXPORT_SYMBOL(__TBIPoll);
+EXPORT_SYMBOL(__TBITimeStamp);
+
+#define DECLARE_EXPORT(name) extern void name(void); EXPORT_SYMBOL(name)
+
+/* libgcc functions */
+DECLARE_EXPORT(__ashldi3);
+DECLARE_EXPORT(__ashrdi3);
+DECLARE_EXPORT(__lshrdi3);
+DECLARE_EXPORT(__udivsi3);
+DECLARE_EXPORT(__divsi3);
+DECLARE_EXPORT(__umodsi3);
+DECLARE_EXPORT(__modsi3);
+DECLARE_EXPORT(__muldi3);
+DECLARE_EXPORT(__cmpdi2);
+DECLARE_EXPORT(__ucmpdi2);
+
+/* Maths functions */
+EXPORT_SYMBOL(div_u64);
+EXPORT_SYMBOL(div_s64);
+
+/* String functions */
+EXPORT_SYMBOL(memcpy);
+EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(memmove);
+
+#ifdef CONFIG_FUNCTION_TRACER
+EXPORT_SYMBOL(mcount_wrapper);
+#endif
diff --git a/arch/metag/kernel/module.c b/arch/metag/kernel/module.c
new file mode 100644 (file)
index 0000000..986331c
--- /dev/null
@@ -0,0 +1,284 @@
+/*  Kernel module help for Meta.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+*/
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sort.h>
+
+#include <asm/unaligned.h>
+
+/* Count how many different relocations (different symbol, different
+   addend) */
+static unsigned int count_relocs(const Elf32_Rela *rela, unsigned int num)
+{
+       unsigned int i, r_info, r_addend, _count_relocs;
+
+       _count_relocs = 0;
+       r_info = 0;
+       r_addend = 0;
+       for (i = 0; i < num; i++)
+               /* Only count relbranch relocs, others don't need stubs */
+               if (ELF32_R_TYPE(rela[i].r_info) == R_METAG_RELBRANCH &&
+                   (r_info != ELF32_R_SYM(rela[i].r_info) ||
+                    r_addend != rela[i].r_addend)) {
+                       _count_relocs++;
+                       r_info = ELF32_R_SYM(rela[i].r_info);
+                       r_addend = rela[i].r_addend;
+               }
+
+       return _count_relocs;
+}
+
+static int relacmp(const void *_x, const void *_y)
+{
+       const Elf32_Rela *x, *y;
+
+       y = (Elf32_Rela *)_x;
+       x = (Elf32_Rela *)_y;
+
+       /* Compare the entire r_info (as opposed to ELF32_R_SYM(r_info) only) to
+        * make the comparison cheaper/faster. It won't affect the sorting or
+        * the counting algorithms' performance
+        */
+       if (x->r_info < y->r_info)
+               return -1;
+       else if (x->r_info > y->r_info)
+               return 1;
+       else if (x->r_addend < y->r_addend)
+               return -1;
+       else if (x->r_addend > y->r_addend)
+               return 1;
+       else
+               return 0;
+}
+
+static void relaswap(void *_x, void *_y, int size)
+{
+       uint32_t *x, *y, tmp;
+       int i;
+
+       y = (uint32_t *)_x;
+       x = (uint32_t *)_y;
+
+       for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
+               tmp = x[i];
+               x[i] = y[i];
+               y[i] = tmp;
+       }
+}
+
+/* Get the potential trampolines size required of the init and
+   non-init sections */
+static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
+                                 const Elf32_Shdr *sechdrs,
+                                 const char *secstrings,
+                                 int is_init)
+{
+       unsigned long ret = 0;
+       unsigned i;
+
+       /* Everything marked ALLOC (this includes the exported
+          symbols) */
+       for (i = 1; i < hdr->e_shnum; i++) {
+               /* If it's called *.init*, and we're not init, we're
+                  not interested */
+               if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
+                   != is_init)
+                       continue;
+
+               /* We don't want to look at debug sections. */
+               if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != NULL)
+                       continue;
+
+               if (sechdrs[i].sh_type == SHT_RELA) {
+                       pr_debug("Found relocations in section %u\n", i);
+                       pr_debug("Ptr: %p.  Number: %u\n",
+                                (void *)hdr + sechdrs[i].sh_offset,
+                                sechdrs[i].sh_size / sizeof(Elf32_Rela));
+
+                       /* Sort the relocation information based on a symbol and
+                        * addend key. This is a stable O(n*log n) complexity
+                        * alogrithm but it will reduce the complexity of
+                        * count_relocs() to linear complexity O(n)
+                        */
+                       sort((void *)hdr + sechdrs[i].sh_offset,
+                            sechdrs[i].sh_size / sizeof(Elf32_Rela),
+                            sizeof(Elf32_Rela), relacmp, relaswap);
+
+                       ret += count_relocs((void *)hdr
+                                            + sechdrs[i].sh_offset,
+                                            sechdrs[i].sh_size
+                                            / sizeof(Elf32_Rela))
+                               * sizeof(struct metag_plt_entry);
+               }
+       }
+
+       return ret;
+}
+
+int module_frob_arch_sections(Elf32_Ehdr *hdr,
+                             Elf32_Shdr *sechdrs,
+                             char *secstrings,
+                             struct module *me)
+{
+       unsigned int i;
+
+       /* Find .plt and .init.plt sections */
+       for (i = 0; i < hdr->e_shnum; i++) {
+               if (strcmp(secstrings + sechdrs[i].sh_name, ".init.plt") == 0)
+                       me->arch.init_plt_section = i;
+               else if (strcmp(secstrings + sechdrs[i].sh_name, ".plt") == 0)
+                       me->arch.core_plt_section = i;
+       }
+       if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
+               pr_err("Module doesn't contain .plt or .init.plt sections.\n");
+               return -ENOEXEC;
+       }
+
+       /* Override their sizes */
+       sechdrs[me->arch.core_plt_section].sh_size
+               = get_plt_size(hdr, sechdrs, secstrings, 0);
+       sechdrs[me->arch.core_plt_section].sh_type = SHT_NOBITS;
+       sechdrs[me->arch.init_plt_section].sh_size
+               = get_plt_size(hdr, sechdrs, secstrings, 1);
+       sechdrs[me->arch.init_plt_section].sh_type = SHT_NOBITS;
+       return 0;
+}
+
+/* Set up a trampoline in the PLT to bounce us to the distant function */
+static uint32_t do_plt_call(void *location, Elf32_Addr val,
+                           Elf32_Shdr *sechdrs, struct module *mod)
+{
+       struct metag_plt_entry *entry;
+       /* Instructions used to do the indirect jump.  */
+       uint32_t tramp[2];
+
+       /* We have to trash a register, so we assume that any control
+          transfer more than 21-bits away must be a function call
+          (so we can use a call-clobbered register).  */
+
+       /* MOVT D0Re0,#HI(v) */
+       tramp[0] = 0x02000005 | (((val & 0xffff0000) >> 16) << 3);
+       /* JUMP D0Re0,#LO(v) */
+       tramp[1] = 0xac000001 | ((val & 0x0000ffff) << 3);
+
+       /* Init, or core PLT? */
+       if (location >= mod->module_core
+           && location < mod->module_core + mod->core_size)
+               entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
+       else
+               entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
+
+       /* Find this entry, or if that fails, the next avail. entry */
+       while (entry->tramp[0])
+               if (entry->tramp[0] == tramp[0] && entry->tramp[1] == tramp[1])
+                       return (uint32_t)entry;
+               else
+                       entry++;
+
+       entry->tramp[0] = tramp[0];
+       entry->tramp[1] = tramp[1];
+
+       return (uint32_t)entry;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+                  const char *strtab,
+                  unsigned int symindex,
+                  unsigned int relsec,
+                  struct module *me)
+{
+       unsigned int i;
+       Elf32_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+       Elf32_Sym *sym;
+       Elf32_Addr relocation;
+       uint32_t *location;
+       int32_t value;
+
+       pr_debug("Applying relocate section %u to %u\n", relsec,
+                sechdrs[relsec].sh_info);
+       for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+               /* This is where to make the change */
+               location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
+                       + rel[i].r_offset;
+               /* This is the symbol it is referring to.  Note that all
+                  undefined symbols have been resolved.  */
+               sym = (Elf32_Sym *)sechdrs[symindex].sh_addr
+                       + ELF32_R_SYM(rel[i].r_info);
+               relocation = sym->st_value + rel[i].r_addend;
+
+               switch (ELF32_R_TYPE(rel[i].r_info)) {
+               case R_METAG_NONE:
+                       break;
+               case R_METAG_HIADDR16:
+                       relocation >>= 16;
+               case R_METAG_LOADDR16:
+                       *location = (*location & 0xfff80007) |
+                               ((relocation & 0xffff) << 3);
+                       break;
+               case R_METAG_ADDR32:
+                       /*
+                        * Packed data structures may cause a misaligned
+                        * R_METAG_ADDR32 to be emitted.
+                        */
+                       put_unaligned(relocation, location);
+                       break;
+               case R_METAG_GETSETOFF:
+                       *location += ((relocation & 0xfff) << 7);
+                       break;
+               case R_METAG_RELBRANCH:
+                       if (*location & (0x7ffff << 5)) {
+                               pr_err("bad relbranch relocation\n");
+                               break;
+                       }
+
+                       /* This jump is too big for the offset slot. Build
+                        * a PLT to jump through to get to where we want to go.
+                        * NB: 21bit check - not scaled to 19bit yet
+                        */
+                       if (((int32_t)(relocation -
+                                      (uint32_t)location) > 0xfffff) ||
+                           ((int32_t)(relocation -
+                                      (uint32_t)location) < -0xfffff)) {
+                               relocation = do_plt_call(location, relocation,
+                                                        sechdrs, me);
+                       }
+
+                       value = relocation - (uint32_t)location;
+
+                       /* branch instruction aligned */
+                       value /= 4;
+
+                       if ((value > 0x7ffff) || (value < -0x7ffff)) {
+                               /*
+                                * this should have been caught by the code
+                                * above!
+                                */
+                               pr_err("overflow of relbranch reloc\n");
+                       }
+
+                       *location = (*location & (~(0x7ffff << 5))) |
+                               ((value & 0x7ffff) << 5);
+                       break;
+
+               default:
+                       pr_err("module %s: Unknown relocation: %u\n",
+                              me->name, ELF32_R_TYPE(rel[i].r_info));
+                       return -ENOEXEC;
+               }
+       }
+       return 0;
+}
diff --git a/arch/metag/kernel/perf/Makefile b/arch/metag/kernel/perf/Makefile
new file mode 100644 (file)
index 0000000..b158cb2
--- /dev/null
@@ -0,0 +1,3 @@
+# Makefile for performance event core
+
+obj-y += perf_event.o
diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c
new file mode 100644 (file)
index 0000000..a876d5f
--- /dev/null
@@ -0,0 +1,861 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This code is based on the sh pmu code:
+ *  Copyright (C) 2009 Paul Mundt
+ *
+ * and on the arm pmu code:
+ *  Copyright (C) 2009 picoChip Designs, Ltd., James Iles
+ *  Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/irqchip/metag.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include <asm/core_reg.h>
+#include <asm/hwthread.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+
+#include "perf_event.h"
+
+static int _hw_perf_event_init(struct perf_event *);
+static void _hw_perf_event_destroy(struct perf_event *);
+
+/* Determines which core type we are */
+static struct metag_pmu *metag_pmu __read_mostly;
+
+/* Processor specific data */
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+/* PMU admin */
+const char *perf_pmu_name(void)
+{
+       if (metag_pmu)
+               return metag_pmu->pmu.name;
+
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_pmu_name);
+
+int perf_num_counters(void)
+{
+       if (metag_pmu)
+               return metag_pmu->max_events;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(perf_num_counters);
+
+static inline int metag_pmu_initialised(void)
+{
+       return !!metag_pmu;
+}
+
+static void release_pmu_hardware(void)
+{
+       int irq;
+       unsigned int version = (metag_pmu->version &
+                       (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+                       METAC_ID_REV_S;
+
+       /* Early cores don't have overflow interrupts */
+       if (version < 0x0104)
+               return;
+
+       irq = internal_irq_map(17);
+       if (irq >= 0)
+               free_irq(irq, (void *)1);
+
+       irq = internal_irq_map(16);
+       if (irq >= 0)
+               free_irq(irq, (void *)0);
+}
+
+static int reserve_pmu_hardware(void)
+{
+       int err = 0, irq[2];
+       unsigned int version = (metag_pmu->version &
+                       (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS)) >>
+                       METAC_ID_REV_S;
+
+       /* Early cores don't have overflow interrupts */
+       if (version < 0x0104)
+               goto out;
+
+       /*
+        * Bit 16 on HWSTATMETA is the interrupt for performance counter 0;
+        * similarly, 17 is the interrupt for performance counter 1.
+        * We can't (yet) interrupt on the cycle counter, because it's a
+        * register, however it holds a 32-bit value as opposed to 24-bit.
+        */
+       irq[0] = internal_irq_map(16);
+       if (irq[0] < 0) {
+               pr_err("unable to map internal IRQ %d\n", 16);
+               goto out;
+       }
+       err = request_irq(irq[0], metag_pmu->handle_irq, IRQF_NOBALANCING,
+                       "metagpmu0", (void *)0);
+       if (err) {
+               pr_err("unable to request IRQ%d for metag PMU counters\n",
+                               irq[0]);
+               goto out;
+       }
+
+       irq[1] = internal_irq_map(17);
+       if (irq[1] < 0) {
+               pr_err("unable to map internal IRQ %d\n", 17);
+               goto out_irq1;
+       }
+       err = request_irq(irq[1], metag_pmu->handle_irq, IRQF_NOBALANCING,
+                       "metagpmu1", (void *)1);
+       if (err) {
+               pr_err("unable to request IRQ%d for metag PMU counters\n",
+                               irq[1]);
+               goto out_irq1;
+       }
+
+       return 0;
+
+out_irq1:
+       free_irq(irq[0], (void *)0);
+out:
+       return err;
+}
+
+/* PMU operations */
+static void metag_pmu_enable(struct pmu *pmu)
+{
+}
+
+static void metag_pmu_disable(struct pmu *pmu)
+{
+}
+
+static int metag_pmu_event_init(struct perf_event *event)
+{
+       int err = 0;
+       atomic_t *active_events = &metag_pmu->active_events;
+
+       if (!metag_pmu_initialised()) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       if (has_branch_stack(event))
+               return -EOPNOTSUPP;
+
+       event->destroy = _hw_perf_event_destroy;
+
+       if (!atomic_inc_not_zero(active_events)) {
+               mutex_lock(&metag_pmu->reserve_mutex);
+               if (atomic_read(active_events) == 0)
+                       err = reserve_pmu_hardware();
+
+               if (!err)
+                       atomic_inc(active_events);
+
+               mutex_unlock(&metag_pmu->reserve_mutex);
+       }
+
+       /* Hardware and caches counters */
+       switch (event->attr.type) {
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               err = _hw_perf_event_init(event);
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       if (err)
+               event->destroy(event);
+
+out:
+       return err;
+}
+
+void metag_pmu_event_update(struct perf_event *event,
+               struct hw_perf_event *hwc, int idx)
+{
+       u64 prev_raw_count, new_raw_count;
+       s64 delta;
+
+       /*
+        * If this counter is chained, it may be that the previous counter
+        * value has been changed beneath us.
+        *
+        * To get around this, we read and exchange the new raw count, then
+        * add the delta (new - prev) to the generic counter atomically.
+        *
+        * Without interrupts, this is the simplest approach.
+        */
+again:
+       prev_raw_count = local64_read(&hwc->prev_count);
+       new_raw_count = metag_pmu->read(idx);
+
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+                       new_raw_count) != prev_raw_count)
+               goto again;
+
+       /*
+        * Calculate the delta and add it to the counter.
+        */
+       delta = new_raw_count - prev_raw_count;
+
+       local64_add(delta, &event->count);
+}
+
+int metag_pmu_event_set_period(struct perf_event *event,
+               struct hw_perf_event *hwc, int idx)
+{
+       s64 left = local64_read(&hwc->period_left);
+       s64 period = hwc->sample_period;
+       int ret = 0;
+
+       if (unlikely(left <= -period)) {
+               left = period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (unlikely(left <= 0)) {
+               left += period;
+               local64_set(&hwc->period_left, left);
+               hwc->last_period = period;
+               ret = 1;
+       }
+
+       if (left > (s64)metag_pmu->max_period)
+               left = metag_pmu->max_period;
+
+       if (metag_pmu->write)
+               metag_pmu->write(idx, (u64)(-left) & MAX_PERIOD);
+
+       perf_event_update_userpage(event);
+
+       return ret;
+}
+
+static void metag_pmu_start(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       if (WARN_ON_ONCE(idx == -1))
+               return;
+
+       /*
+        * We always have to reprogram the period, so ignore PERF_EF_RELOAD.
+        */
+       if (flags & PERF_EF_RELOAD)
+               WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+       hwc->state = 0;
+
+       /*
+        * Reset the period.
+        * Some counters can't be stopped (i.e. are core global), so when the
+        * counter was 'stopped' we merely disabled the IRQ. If we don't reset
+        * the period, then we'll either: a) get an overflow too soon;
+        * or b) too late if the overflow happened since disabling.
+        * Obviously, this has little bearing on cores without the overflow
+        * interrupt, as the performance counter resets to zero on write
+        * anyway.
+        */
+       if (metag_pmu->max_period)
+               metag_pmu_event_set_period(event, hwc, hwc->idx);
+       cpuc->events[idx] = event;
+       metag_pmu->enable(hwc, idx);
+}
+
+static void metag_pmu_stop(struct perf_event *event, int flags)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /*
+        * We should always update the counter on stop; see comment above
+        * why.
+        */
+       if (!(hwc->state & PERF_HES_STOPPED)) {
+               metag_pmu_event_update(event, hwc, hwc->idx);
+               metag_pmu->disable(hwc, hwc->idx);
+               hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       }
+}
+
+static int metag_pmu_add(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = 0, ret = 0;
+
+       perf_pmu_disable(event->pmu);
+
+       /* check whether we're counting instructions */
+       if (hwc->config == 0x100) {
+               if (__test_and_set_bit(METAG_INST_COUNTER,
+                               cpuc->used_mask)) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+               idx = METAG_INST_COUNTER;
+       } else {
+               /* Check whether we have a spare counter */
+               idx = find_first_zero_bit(cpuc->used_mask,
+                               atomic_read(&metag_pmu->active_events));
+               if (idx >= METAG_INST_COUNTER) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+
+               __set_bit(idx, cpuc->used_mask);
+       }
+       hwc->idx = idx;
+
+       /* Make sure the counter is disabled */
+       metag_pmu->disable(hwc, idx);
+
+       hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+       if (flags & PERF_EF_START)
+               metag_pmu_start(event, PERF_EF_RELOAD);
+
+       perf_event_update_userpage(event);
+out:
+       perf_pmu_enable(event->pmu);
+       return ret;
+}
+
+static void metag_pmu_del(struct perf_event *event, int flags)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct hw_perf_event *hwc = &event->hw;
+       int idx = hwc->idx;
+
+       WARN_ON(idx < 0);
+       metag_pmu_stop(event, PERF_EF_UPDATE);
+       cpuc->events[idx] = NULL;
+       __clear_bit(idx, cpuc->used_mask);
+
+       perf_event_update_userpage(event);
+}
+
+static void metag_pmu_read(struct perf_event *event)
+{
+       struct hw_perf_event *hwc = &event->hw;
+
+       /* Don't read disabled counters! */
+       if (hwc->idx < 0)
+               return;
+
+       metag_pmu_event_update(event, hwc, hwc->idx);
+}
+
+static struct pmu pmu = {
+       .pmu_enable     = metag_pmu_enable,
+       .pmu_disable    = metag_pmu_disable,
+
+       .event_init     = metag_pmu_event_init,
+
+       .add            = metag_pmu_add,
+       .del            = metag_pmu_del,
+       .start          = metag_pmu_start,
+       .stop           = metag_pmu_stop,
+       .read           = metag_pmu_read,
+};
+
+/* Core counter specific functions */
+static const int metag_general_events[] = {
+       [PERF_COUNT_HW_CPU_CYCLES] = 0x03,
+       [PERF_COUNT_HW_INSTRUCTIONS] = 0x100,
+       [PERF_COUNT_HW_CACHE_REFERENCES] = -1,
+       [PERF_COUNT_HW_CACHE_MISSES] = -1,
+       [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = -1,
+       [PERF_COUNT_HW_BRANCH_MISSES] = -1,
+       [PERF_COUNT_HW_BUS_CYCLES] = -1,
+       [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = -1,
+       [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = -1,
+       [PERF_COUNT_HW_REF_CPU_CYCLES] = -1,
+};
+
+static const int metag_pmu_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+       [C(L1D)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0x08,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(L1I)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0x09,
+                       [C(RESULT_MISS)] = 0x0a,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(DTLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0xd0,
+                       [C(RESULT_MISS)] = 0xd2,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = 0xd4,
+                       [C(RESULT_MISS)] = 0xd5,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(ITLB)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = 0xd1,
+                       [C(RESULT_MISS)] = 0xd3,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(BPU)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+       [C(NODE)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
+                       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
+               },
+       },
+};
+
+
+static void _hw_perf_event_destroy(struct perf_event *event)
+{
+       atomic_t *active_events = &metag_pmu->active_events;
+       struct mutex *pmu_mutex = &metag_pmu->reserve_mutex;
+
+       if (atomic_dec_and_mutex_lock(active_events, pmu_mutex)) {
+               release_pmu_hardware();
+               mutex_unlock(pmu_mutex);
+       }
+}
+
+static int _hw_perf_cache_event(int config, int *evp)
+{
+       unsigned long type, op, result;
+       int ev;
+
+       if (!metag_pmu->cache_events)
+               return -EINVAL;
+
+       /* Unpack config */
+       type = config & 0xff;
+       op = (config >> 8) & 0xff;
+       result = (config >> 16) & 0xff;
+
+       if (type >= PERF_COUNT_HW_CACHE_MAX ||
+                       op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+                       result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+               return -EINVAL;
+
+       ev = (*metag_pmu->cache_events)[type][op][result];
+       if (ev == 0)
+               return -EOPNOTSUPP;
+       if (ev == -1)
+               return -EINVAL;
+       *evp = ev;
+       return 0;
+}
+
+static int _hw_perf_event_init(struct perf_event *event)
+{
+       struct perf_event_attr *attr = &event->attr;
+       struct hw_perf_event *hwc = &event->hw;
+       int mapping = 0, err;
+
+       switch (attr->type) {
+       case PERF_TYPE_HARDWARE:
+               if (attr->config >= PERF_COUNT_HW_MAX)
+                       return -EINVAL;
+
+               mapping = metag_pmu->event_map(attr->config);
+               break;
+
+       case PERF_TYPE_HW_CACHE:
+               err = _hw_perf_cache_event(attr->config, &mapping);
+               if (err)
+                       return err;
+               break;
+       }
+
+       /* Return early if the event is unsupported */
+       if (mapping == -1)
+               return -EINVAL;
+
+       /*
+        * Early cores have "limited" counters - they have no overflow
+        * interrupts - and so are unable to do sampling without extra work
+        * and timer assistance.
+        */
+       if (metag_pmu->max_period == 0) {
+               if (hwc->sample_period)
+                       return -EINVAL;
+       }
+
+       /*
+        * Don't assign an index until the event is placed into the hardware.
+        * -1 signifies that we're still deciding where to put it. On SMP
+        * systems each core has its own set of counters, so we can't do any
+        * constraint checking yet.
+        */
+       hwc->idx = -1;
+
+       /* Store the event encoding */
+       hwc->config |= (unsigned long)mapping;
+
+       /*
+        * For non-sampling runs, limit the sample_period to half of the
+        * counter width. This way, the new counter value should be less
+        * likely to overtake the previous one (unless there are IRQ latency
+        * issues...)
+        */
+       if (metag_pmu->max_period) {
+               if (!hwc->sample_period) {
+                       hwc->sample_period = metag_pmu->max_period >> 1;
+                       hwc->last_period = hwc->sample_period;
+                       local64_set(&hwc->period_left, hwc->sample_period);
+               }
+       }
+
+       return 0;
+}
+
+static void metag_pmu_enable_counter(struct hw_perf_event *event, int idx)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       unsigned int config = event->config;
+       unsigned int tmp = config & 0xf0;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       /*
+        * Check if we're enabling the instruction counter (index of
+        * MAX_HWEVENTS - 1)
+        */
+       if (METAG_INST_COUNTER == idx) {
+               WARN_ONCE((config != 0x100),
+                       "invalid configuration (%d) for counter (%d)\n",
+                       config, idx);
+
+               /* Reset the cycle count */
+               __core_reg_set(TXTACTCYC, 0);
+               goto unlock;
+       }
+
+       /* Check for a core internal or performance channel event. */
+       if (tmp) {
+               void *perf_addr = (void *)PERF_COUNT(idx);
+
+               /*
+                * Anything other than a cycle count will write the low-
+                * nibble to the correct counter register.
+                */
+               switch (tmp) {
+               case 0xd0:
+                       perf_addr = (void *)PERF_ICORE(idx);
+                       break;
+
+               case 0xf0:
+                       perf_addr = (void *)PERF_CHAN(idx);
+                       break;
+               }
+
+               metag_out32((tmp & 0x0f), perf_addr);
+
+               /*
+                * Now we use the high nibble as the performance event to
+                * to count.
+                */
+               config = tmp >> 4;
+       }
+
+       /*
+        * Enabled counters start from 0. Early cores clear the count on
+        * write but newer cores don't, so we make sure that the count is
+        * set to 0.
+        */
+       tmp = ((config & 0xf) << 28) |
+                       ((1 << 24) << cpu_2_hwthread_id[get_cpu()]);
+       metag_out32(tmp, PERF_COUNT(idx));
+unlock:
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void metag_pmu_disable_counter(struct hw_perf_event *event, int idx)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       unsigned int tmp = 0;
+       unsigned long flags;
+
+       /*
+        * The cycle counter can't be disabled per se, as it's a hardware
+        * thread register which is always counting. We merely return if this
+        * is the counter we're attempting to disable.
+        */
+       if (METAG_INST_COUNTER == idx)
+               return;
+
+       /*
+        * The counter value _should_ have been read prior to disabling,
+        * as if we're running on an early core then the value gets reset to
+        * 0, and any read after that would be useless. On the newer cores,
+        * however, it's better to read-modify-update this for purposes of
+        * the overflow interrupt.
+        * Here we remove the thread id AND the event nibble (there are at
+        * least two events that count events that are core global and ignore
+        * the thread id mask). This only works because we don't mix thread
+        * performance counts, and event 0x00 requires a thread id mask!
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       tmp = metag_in32(PERF_COUNT(idx));
+       tmp &= 0x00ffffff;
+       metag_out32(tmp, PERF_COUNT(idx));
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static u64 metag_pmu_read_counter(int idx)
+{
+       u32 tmp = 0;
+
+       /* The act of reading the cycle counter also clears it */
+       if (METAG_INST_COUNTER == idx) {
+               __core_reg_swap(TXTACTCYC, tmp);
+               goto out;
+       }
+
+       tmp = metag_in32(PERF_COUNT(idx)) & 0x00ffffff;
+out:
+       return tmp;
+}
+
+static void metag_pmu_write_counter(int idx, u32 val)
+{
+       struct cpu_hw_events *events = &__get_cpu_var(cpu_hw_events);
+       u32 tmp = 0;
+       unsigned long flags;
+
+       /*
+        * This _shouldn't_ happen, but if it does, then we can just
+        * ignore the write, as the register is read-only and clear-on-write.
+        */
+       if (METAG_INST_COUNTER == idx)
+               return;
+
+       /*
+        * We'll keep the thread mask and event id, and just update the
+        * counter itself. Also , we should bound the value to 24-bits.
+        */
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+       val &= 0x00ffffff;
+       tmp = metag_in32(PERF_COUNT(idx)) & 0xff000000;
+       val |= tmp;
+       metag_out32(val, PERF_COUNT(idx));
+
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static int metag_pmu_event_map(int idx)
+{
+       return metag_general_events[idx];
+}
+
+static irqreturn_t metag_pmu_counter_overflow(int irq, void *dev)
+{
+       int idx = (int)dev;
+       struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+       struct perf_event *event = cpuhw->events[idx];
+       struct hw_perf_event *hwc = &event->hw;
+       struct pt_regs *regs = get_irq_regs();
+       struct perf_sample_data sampledata;
+       unsigned long flags;
+       u32 counter = 0;
+
+       /*
+        * We need to stop the core temporarily from generating another
+        * interrupt while we disable this counter. However, we don't want
+        * to flag the counter as free
+        */
+       __global_lock2(flags);
+       counter = metag_in32(PERF_COUNT(idx));
+       metag_out32((counter & 0x00ffffff), PERF_COUNT(idx));
+       __global_unlock2(flags);
+
+       /* Update the counts and reset the sample period */
+       metag_pmu_event_update(event, hwc, idx);
+       perf_sample_data_init(&sampledata, 0, hwc->last_period);
+       metag_pmu_event_set_period(event, hwc, idx);
+
+       /*
+        * Enable the counter again once core overflow processing has
+        * completed.
+        */
+       if (!perf_event_overflow(event, &sampledata, regs))
+               metag_out32(counter, PERF_COUNT(idx));
+
+       return IRQ_HANDLED;
+}
+
+static struct metag_pmu _metag_pmu = {
+       .handle_irq     = metag_pmu_counter_overflow,
+       .enable         = metag_pmu_enable_counter,
+       .disable        = metag_pmu_disable_counter,
+       .read           = metag_pmu_read_counter,
+       .write          = metag_pmu_write_counter,
+       .event_map      = metag_pmu_event_map,
+       .cache_events   = &metag_pmu_cache_events,
+       .max_period     = MAX_PERIOD,
+       .max_events     = MAX_HWEVENTS,
+};
+
+/* PMU CPU hotplug notifier */
+static int __cpuinit metag_pmu_cpu_notify(struct notifier_block *b,
+               unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned int)hcpu;
+       struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+       if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
+               return NOTIFY_DONE;
+
+       memset(cpuc, 0, sizeof(struct cpu_hw_events));
+       raw_spin_lock_init(&cpuc->pmu_lock);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata metag_pmu_notifier = {
+       .notifier_call = metag_pmu_cpu_notify,
+};
+
+/* PMU Initialisation */
+static int __init init_hw_perf_events(void)
+{
+       int ret = 0, cpu;
+       u32 version = *(u32 *)METAC_ID;
+       int major = (version & METAC_ID_MAJOR_BITS) >> METAC_ID_MAJOR_S;
+       int min_rev = (version & (METAC_ID_MINOR_BITS | METAC_ID_REV_BITS))
+                       >> METAC_ID_REV_S;
+
+       /* Not a Meta 2 core, then not supported */
+       if (0x02 > major) {
+               pr_info("no hardware counter support available\n");
+               goto out;
+       } else if (0x02 == major) {
+               metag_pmu = &_metag_pmu;
+
+               if (min_rev < 0x0104) {
+                       /*
+                        * A core without overflow interrupts, and clear-on-
+                        * write counters.
+                        */
+                       metag_pmu->handle_irq = NULL;
+                       metag_pmu->write = NULL;
+                       metag_pmu->max_period = 0;
+               }
+
+               metag_pmu->name = "Meta 2";
+               metag_pmu->version = version;
+               metag_pmu->pmu = pmu;
+       }
+
+       pr_info("enabled with %s PMU driver, %d counters available\n",
+                       metag_pmu->name, metag_pmu->max_events);
+
+       /* Initialise the active events and reservation mutex */
+       atomic_set(&metag_pmu->active_events, 0);
+       mutex_init(&metag_pmu->reserve_mutex);
+
+       /* Clear the counters */
+       metag_out32(0, PERF_COUNT(0));
+       metag_out32(0, PERF_COUNT(1));
+
+       for_each_possible_cpu(cpu) {
+               struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+
+               memset(cpuc, 0, sizeof(struct cpu_hw_events));
+               raw_spin_lock_init(&cpuc->pmu_lock);
+       }
+
+       register_cpu_notifier(&metag_pmu_notifier);
+       ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW);
+out:
+       return ret;
+}
+early_initcall(init_hw_perf_events);
diff --git a/arch/metag/kernel/perf/perf_event.h b/arch/metag/kernel/perf/perf_event.h
new file mode 100644 (file)
index 0000000..fd10a13
--- /dev/null
@@ -0,0 +1,106 @@
+/*
+ * Meta performance counter support.
+ *  Copyright (C) 2012 Imagination Technologies Ltd
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#ifndef METAG_PERF_EVENT_H_
+#define METAG_PERF_EVENT_H_
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+
+/* For performance counter definitions */
+#include <asm/metag_mem.h>
+
+/*
+ * The Meta core has two performance counters, with 24-bit resolution. Newer
+ * cores generate an overflow interrupt on transition from 0xffffff to 0.
+ *
+ * Each counter consists of the counter id, hardware thread id, and the count
+ * itself; each counter can be assigned to multiple hardware threads at any
+ * one time, with the returned count being an aggregate of events. A small
+ * number of events are thread global, i.e. they count the aggregate of all
+ * threads' events, regardless of the thread selected.
+ *
+ * Newer cores can store an arbitrary 24-bit number in the counter, whereas
+ * older cores will clear the counter bits on write.
+ *
+ * We also have a pseudo-counter in the form of the thread active cycles
+ * counter (which, incidentally, is also bound to
+ */
+
+#define MAX_HWEVENTS           3
+#define MAX_PERIOD             ((1UL << 24) - 1)
+#define METAG_INST_COUNTER     (MAX_HWEVENTS - 1)
+
+/**
+ * struct cpu_hw_events - a processor core's performance events
+ * @events:    an array of perf_events active for a given index.
+ * @used_mask: a bitmap of in-use counters.
+ * @pmu_lock:  a perf counter lock
+ *
+ * This is a per-cpu/core structure that maintains a record of its
+ * performance counters' state.
+ */
+struct cpu_hw_events {
+       struct perf_event       *events[MAX_HWEVENTS];
+       unsigned long           used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+       raw_spinlock_t          pmu_lock;
+};
+
+/**
+ * struct metag_pmu - the Meta PMU structure
+ * @pmu:               core pmu structure
+ * @name:              pmu name
+ * @version:           core version
+ * @handle_irq:                overflow interrupt handler
+ * @enable:            enable a counter
+ * @disable:           disable a counter
+ * @read:              read the value of a counter
+ * @write:             write a value to a counter
+ * @event_map:         kernel event to counter event id map
+ * @cache_events:      kernel cache counter to core cache counter map
+ * @max_period:                maximum value of the counter before overflow
+ * @max_events:                maximum number of counters available at any one time
+ * @active_events:     number of active counters
+ * @reserve_mutex:     counter reservation mutex
+ *
+ * This describes the main functionality and data used by the performance
+ * event core.
+ */
+struct metag_pmu {
+       struct pmu      pmu;
+       const char      *name;
+       u32             version;
+       irqreturn_t     (*handle_irq)(int irq_num, void *dev);
+       void            (*enable)(struct hw_perf_event *evt, int idx);
+       void            (*disable)(struct hw_perf_event *evt, int idx);
+       u64             (*read)(int idx);
+       void            (*write)(int idx, u32 val);
+       int             (*event_map)(int idx);
+       const int       (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
+       u32             max_period;
+       int             max_events;
+       atomic_t        active_events;
+       struct mutex    reserve_mutex;
+};
+
+/* Convenience macros for accessing the perf counters */
+/* Define some convenience accessors */
+#define PERF_COUNT(x)  (PERF_COUNT0 + (sizeof(u64) * (x)))
+#define PERF_ICORE(x)  (PERF_ICORE0 + (sizeof(u64) * (x)))
+#define PERF_CHAN(x)   (PERF_CHAN0 + (sizeof(u64) * (x)))
+
+/* Cache index macros */
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_OP_UNSUPPORTED   0xfffe
+#define CACHE_OP_NONSENSE      0xffff
+
+#endif
diff --git a/arch/metag/kernel/perf_callchain.c b/arch/metag/kernel/perf_callchain.c
new file mode 100644 (file)
index 0000000..3156334
--- /dev/null
@@ -0,0 +1,96 @@
+/*
+ * Perf callchain handling code.
+ *
+ *   Based on the ARM perf implementation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+static bool is_valid_call(unsigned long calladdr)
+{
+       unsigned int callinsn;
+
+       /* Check the possible return address is aligned. */
+       if (!(calladdr & 0x3)) {
+               if (!get_user(callinsn, (unsigned int *)calladdr)) {
+                       /* Check for CALLR or SWAP PC,D1RtP. */
+                       if ((callinsn & 0xff000000) == 0xab000000 ||
+                           callinsn == 0xa3200aa0)
+                               return true;
+               }
+       }
+       return false;
+}
+
+static struct metag_frame __user *
+user_backtrace(struct metag_frame __user *user_frame,
+              struct perf_callchain_entry *entry)
+{
+       struct metag_frame frame;
+       unsigned long calladdr;
+
+       /* We cannot rely on having frame pointers in user code. */
+       while (1) {
+               /* Also check accessibility of one struct frame beyond */
+               if (!access_ok(VERIFY_READ, user_frame, sizeof(frame)))
+                       return 0;
+               if (__copy_from_user_inatomic(&frame, user_frame,
+                                             sizeof(frame)))
+                       return 0;
+
+               --user_frame;
+
+               calladdr = frame.lr - 4;
+               if (is_valid_call(calladdr)) {
+                       perf_callchain_store(entry, calladdr);
+                       return user_frame;
+               }
+       }
+
+       return 0;
+}
+
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       unsigned long sp = regs->ctx.AX[0].U0;
+       struct metag_frame __user *frame;
+
+       frame = (struct metag_frame __user *)sp;
+
+       --frame;
+
+       while ((entry->nr < PERF_MAX_STACK_DEPTH) && frame)
+               frame = user_backtrace(frame, entry);
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int
+callchain_trace(struct stackframe *fr,
+               void *data)
+{
+       struct perf_callchain_entry *entry = data;
+       perf_callchain_store(entry, fr->pc);
+       return 0;
+}
+
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+{
+       struct stackframe fr;
+
+       fr.fp = regs->ctx.AX[1].U0;
+       fr.sp = regs->ctx.AX[0].U0;
+       fr.lr = regs->ctx.DX[4].U1;
+       fr.pc = regs->ctx.CurrPC;
+       walk_stackframe(&fr, callchain_trace, entry);
+}
diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c
new file mode 100644 (file)
index 0000000..c6efe62
--- /dev/null
@@ -0,0 +1,461 @@
+/*
+ * Copyright (C) 2005,2006,2007,2008,2009,2010,2011 Imagination Technologies
+ *
+ * This file contains the architecture-dependent parts of process handling.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/unistd.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/reboot.h>
+#include <linux/elfcore.h>
+#include <linux/fs.h>
+#include <linux/tick.h>
+#include <linux/slab.h>
+#include <linux/mman.h>
+#include <linux/pm.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <asm/core_reg.h>
+#include <asm/user_gateway.h>
+#include <asm/tcm.h>
+#include <asm/traps.h>
+#include <asm/switch_to.h>
+
+/*
+ * Wait for the next interrupt and enable local interrupts
+ */
+static inline void arch_idle(void)
+{
+       int tmp;
+
+       /*
+        * Quickly jump straight into the interrupt entry point without actually
+        * triggering an interrupt. When TXSTATI gets read the processor will
+        * block until an interrupt is triggered.
+        */
+       asm volatile (/* Switch into ISTAT mode */
+                     "RTH\n\t"
+                     /* Enable local interrupts */
+                     "MOV      TXMASKI, %1\n\t"
+                     /*
+                      * We can't directly "SWAP PC, PCX", so we swap via a
+                      * temporary. Essentially we do:
+                      *  PCX_new = 1f (the place to continue execution)
+                      *  PC = PCX_old
+                      */
+                     "ADD      %0, CPC0, #(1f-.)\n\t"
+                     "SWAP     PCX, %0\n\t"
+                     "MOV      PC, %0\n"
+                     /* Continue execution here with interrupts enabled */
+                     "1:"
+                     : "=a" (tmp)
+                     : "r" (get_trigger_mask()));
+}
+
+void cpu_idle(void)
+{
+       set_thread_flag(TIF_POLLING_NRFLAG);
+
+       while (1) {
+               tick_nohz_idle_enter();
+               rcu_idle_enter();
+
+               while (!need_resched()) {
+                       /*
+                        * We need to disable interrupts here to ensure we don't
+                        * miss a wakeup call.
+                        */
+                       local_irq_disable();
+                       if (!need_resched()) {
+#ifdef CONFIG_HOTPLUG_CPU
+                               if (cpu_is_offline(smp_processor_id()))
+                                       cpu_die();
+#endif
+                               arch_idle();
+                       } else {
+                               local_irq_enable();
+                       }
+               }
+
+               rcu_idle_exit();
+               tick_nohz_idle_exit();
+               schedule_preempt_disabled();
+        }
+}
+
+void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
+
+void (*soc_restart)(char *cmd);
+void (*soc_halt)(void);
+
+void machine_restart(char *cmd)
+{
+       if (soc_restart)
+               soc_restart(cmd);
+       hard_processor_halt(HALT_OK);
+}
+
+void machine_halt(void)
+{
+       if (soc_halt)
+               soc_halt();
+       smp_send_stop();
+       hard_processor_halt(HALT_OK);
+}
+
+void machine_power_off(void)
+{
+       if (pm_power_off)
+               pm_power_off();
+       smp_send_stop();
+       hard_processor_halt(HALT_OK);
+}
+
+#define FLAG_Z 0x8
+#define FLAG_N 0x4
+#define FLAG_O 0x2
+#define FLAG_C 0x1
+
+void show_regs(struct pt_regs *regs)
+{
+       int i;
+       const char *AX0_names[] = {"A0StP", "A0FrP"};
+       const char *AX1_names[] = {"A1GbP", "A1LbP"};
+
+       const char *DX0_names[] = {
+               "D0Re0",
+               "D0Ar6",
+               "D0Ar4",
+               "D0Ar2",
+               "D0FrT",
+               "D0.5 ",
+               "D0.6 ",
+               "D0.7 "
+       };
+
+       const char *DX1_names[] = {
+               "D1Re0",
+               "D1Ar5",
+               "D1Ar3",
+               "D1Ar1",
+               "D1RtP",
+               "D1.5 ",
+               "D1.6 ",
+               "D1.7 "
+       };
+
+       pr_info(" pt_regs @ %p\n", regs);
+       pr_info(" SaveMask = 0x%04hx\n", regs->ctx.SaveMask);
+       pr_info(" Flags = 0x%04hx (%c%c%c%c)\n", regs->ctx.Flags,
+               regs->ctx.Flags & FLAG_Z ? 'Z' : 'z',
+               regs->ctx.Flags & FLAG_N ? 'N' : 'n',
+               regs->ctx.Flags & FLAG_O ? 'O' : 'o',
+               regs->ctx.Flags & FLAG_C ? 'C' : 'c');
+       pr_info(" TXRPT = 0x%08x\n", regs->ctx.CurrRPT);
+       pr_info(" PC = 0x%08x\n", regs->ctx.CurrPC);
+
+       /* AX regs */
+       for (i = 0; i < 2; i++) {
+               pr_info(" %s = 0x%08x    ",
+                       AX0_names[i],
+                       regs->ctx.AX[i].U0);
+               printk(" %s = 0x%08x\n",
+                       AX1_names[i],
+                       regs->ctx.AX[i].U1);
+       }
+
+       if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+               pr_warn(" Extended state present - AX2.[01] will be WRONG\n");
+
+       /* Special place with AXx.2 */
+       pr_info(" A0.2  = 0x%08x    ",
+               regs->ctx.Ext.AX2.U0);
+       printk(" A1.2  = 0x%08x\n",
+               regs->ctx.Ext.AX2.U1);
+
+       /* 'extended' AX regs (nominally, just AXx.3) */
+       for (i = 0; i < (TBICTX_AX_REGS - 3); i++) {
+               pr_info(" A0.%d  = 0x%08x    ", i + 3, regs->ctx.AX3[i].U0);
+               printk(" A1.%d  = 0x%08x\n", i + 3, regs->ctx.AX3[i].U1);
+       }
+
+       for (i = 0; i < 8; i++) {
+               pr_info(" %s = 0x%08x    ", DX0_names[i], regs->ctx.DX[i].U0);
+               printk(" %s = 0x%08x\n", DX1_names[i], regs->ctx.DX[i].U1);
+       }
+
+       show_trace(NULL, (unsigned long *)regs->ctx.AX[0].U0, regs);
+}
+
+int copy_thread(unsigned long clone_flags, unsigned long usp,
+               unsigned long arg, struct task_struct *tsk)
+{
+       struct pt_regs *childregs = task_pt_regs(tsk);
+       void *kernel_context = ((void *) childregs +
+                               sizeof(struct pt_regs));
+       unsigned long global_base;
+
+       BUG_ON(((unsigned long)childregs) & 0x7);
+       BUG_ON(((unsigned long)kernel_context) & 0x7);
+
+       memset(&tsk->thread.kernel_context, 0,
+                       sizeof(tsk->thread.kernel_context));
+
+       tsk->thread.kernel_context = __TBISwitchInit(kernel_context,
+                                                    ret_from_fork,
+                                                    0, 0);
+
+       if (unlikely(tsk->flags & PF_KTHREAD)) {
+               /*
+                * Make sure we don't leak any kernel data to child's regs
+                * if kernel thread becomes a userspace thread in the future
+                */
+               memset(childregs, 0 , sizeof(struct pt_regs));
+
+               global_base = __core_reg_get(A1GbP);
+               childregs->ctx.AX[0].U1 = (unsigned long) global_base;
+               childregs->ctx.AX[0].U0 = (unsigned long) kernel_context;
+               /* Set D1Ar1=arg and D1RtP=usp (fn) */
+               childregs->ctx.DX[4].U1 = usp;
+               childregs->ctx.DX[3].U1 = arg;
+               tsk->thread.int_depth = 2;
+               return 0;
+       }
+       /*
+        * Get a pointer to where the new child's register block should have
+        * been pushed.
+        * The Meta's stack grows upwards, and the context is the the first
+        * thing to be pushed by TBX (phew)
+        */
+       *childregs = *current_pt_regs();
+       /* Set the correct stack for the clone mode */
+       if (usp)
+               childregs->ctx.AX[0].U0 = ALIGN(usp, 8);
+       tsk->thread.int_depth = 1;
+
+       /* set return value for child process */
+       childregs->ctx.DX[0].U0 = 0;
+
+       /* The TLS pointer is passed as an argument to sys_clone. */
+       if (clone_flags & CLONE_SETTLS)
+               tsk->thread.tls_ptr =
+                               (__force void __user *)childregs->ctx.DX[1].U1;
+
+#ifdef CONFIG_METAG_FPU
+       if (tsk->thread.fpu_context) {
+               struct meta_fpu_context *ctx;
+
+               ctx = kmemdup(tsk->thread.fpu_context,
+                             sizeof(struct meta_fpu_context), GFP_ATOMIC);
+               tsk->thread.fpu_context = ctx;
+       }
+#endif
+
+#ifdef CONFIG_METAG_DSP
+       if (tsk->thread.dsp_context) {
+               struct meta_ext_context *ctx;
+               int i;
+
+               ctx = kmemdup(tsk->thread.dsp_context,
+                             sizeof(struct meta_ext_context), GFP_ATOMIC);
+               for (i = 0; i < 2; i++)
+                       ctx->ram[i] = kmemdup(ctx->ram[i], ctx->ram_sz[i],
+                                             GFP_ATOMIC);
+               tsk->thread.dsp_context = ctx;
+       }
+#endif
+
+       return 0;
+}
+
+#ifdef CONFIG_METAG_FPU
+static void alloc_fpu_context(struct thread_struct *thread)
+{
+       thread->fpu_context = kzalloc(sizeof(struct meta_fpu_context),
+                                     GFP_ATOMIC);
+}
+
+static void clear_fpu(struct thread_struct *thread)
+{
+       thread->user_flags &= ~TBICTX_FPAC_BIT;
+       kfree(thread->fpu_context);
+       thread->fpu_context = NULL;
+}
+#else
+static void clear_fpu(struct thread_struct *thread)
+{
+}
+#endif
+
+#ifdef CONFIG_METAG_DSP
+static void clear_dsp(struct thread_struct *thread)
+{
+       if (thread->dsp_context) {
+               kfree(thread->dsp_context->ram[0]);
+               kfree(thread->dsp_context->ram[1]);
+
+               kfree(thread->dsp_context);
+
+               thread->dsp_context = NULL;
+       }
+
+       __core_reg_set(D0.8, 0);
+}
+#else
+static void clear_dsp(struct thread_struct *thread)
+{
+}
+#endif
+
+struct task_struct *__sched __switch_to(struct task_struct *prev,
+                                       struct task_struct *next)
+{
+       TBIRES to, from;
+
+       to.Switch.pCtx = next->thread.kernel_context;
+       to.Switch.pPara = prev;
+
+#ifdef CONFIG_METAG_FPU
+       if (prev->thread.user_flags & TBICTX_FPAC_BIT) {
+               struct pt_regs *regs = task_pt_regs(prev);
+               TBIRES state;
+
+               state.Sig.SaveMask = prev->thread.user_flags;
+               state.Sig.pCtx = &regs->ctx;
+
+               if (!prev->thread.fpu_context)
+                       alloc_fpu_context(&prev->thread);
+               if (prev->thread.fpu_context)
+                       __TBICtxFPUSave(state, prev->thread.fpu_context);
+       }
+       /*
+        * Force a restore of the FPU context next time this process is
+        * scheduled.
+        */
+       if (prev->thread.fpu_context)
+               prev->thread.fpu_context->needs_restore = true;
+#endif
+
+
+       from = __TBISwitch(to, &prev->thread.kernel_context);
+
+       /* Restore TLS pointer for this process. */
+       set_gateway_tls(current->thread.tls_ptr);
+
+       return (struct task_struct *) from.Switch.pPara;
+}
+
+void flush_thread(void)
+{
+       clear_fpu(&current->thread);
+       clear_dsp(&current->thread);
+}
+
+/*
+ * Free current thread data structures etc.
+ */
+void exit_thread(void)
+{
+       clear_fpu(&current->thread);
+       clear_dsp(&current->thread);
+}
+
+/* TODO: figure out how to unwind the kernel stack here to figure out
+ * where we went to sleep. */
+unsigned long get_wchan(struct task_struct *p)
+{
+       return 0;
+}
+
+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
+{
+       /* Returning 0 indicates that the FPU state was not stored (as it was
+        * not in use) */
+       return 0;
+}
+
+#ifdef CONFIG_METAG_USER_TCM
+
+#define ELF_MIN_ALIGN  PAGE_SIZE
+
+#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
+#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
+#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
+
+#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
+
+unsigned long __metag_elf_map(struct file *filep, unsigned long addr,
+                             struct elf_phdr *eppnt, int prot, int type,
+                             unsigned long total_size)
+{
+       unsigned long map_addr, size;
+       unsigned long page_off = ELF_PAGEOFFSET(eppnt->p_vaddr);
+       unsigned long raw_size = eppnt->p_filesz + page_off;
+       unsigned long off = eppnt->p_offset - page_off;
+       unsigned int tcm_tag;
+       addr = ELF_PAGESTART(addr);
+       size = ELF_PAGEALIGN(raw_size);
+
+       /* mmap() will return -EINVAL if given a zero size, but a
+        * segment with zero filesize is perfectly valid */
+       if (!size)
+               return addr;
+
+       tcm_tag = tcm_lookup_tag(addr);
+
+       if (tcm_tag != TCM_INVALID_TAG)
+               type &= ~MAP_FIXED;
+
+       /*
+       * total_size is the size of the ELF (interpreter) image.
+       * The _first_ mmap needs to know the full size, otherwise
+       * randomization might put this image into an overlapping
+       * position with the ELF binary image. (since size < total_size)
+       * So we first map the 'big' image - and unmap the remainder at
+       * the end. (which unmap is needed for ELF images with holes.)
+       */
+       if (total_size) {
+               total_size = ELF_PAGEALIGN(total_size);
+               map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
+               if (!BAD_ADDR(map_addr))
+                       vm_munmap(map_addr+size, total_size-size);
+       } else
+               map_addr = vm_mmap(filep, addr, size, prot, type, off);
+
+       if (!BAD_ADDR(map_addr) && tcm_tag != TCM_INVALID_TAG) {
+               struct tcm_allocation *tcm;
+               unsigned long tcm_addr;
+
+               tcm = kmalloc(sizeof(*tcm), GFP_KERNEL);
+               if (!tcm)
+                       return -ENOMEM;
+
+               tcm_addr = tcm_alloc(tcm_tag, raw_size);
+               if (tcm_addr != addr) {
+                       kfree(tcm);
+                       return -ENOMEM;
+               }
+
+               tcm->tag = tcm_tag;
+               tcm->addr = tcm_addr;
+               tcm->size = raw_size;
+
+               list_add(&tcm->list, &current->mm->context.tcm);
+
+               eppnt->p_vaddr = map_addr;
+               if (copy_from_user((void *) addr, (void __user *) map_addr,
+                                  raw_size))
+                       return -EFAULT;
+       }
+
+       return map_addr;
+}
+#endif
diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
new file mode 100644 (file)
index 0000000..47a8828
--- /dev/null
@@ -0,0 +1,380 @@
+/*
+ *  Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License.  See the file COPYING in the main directory of
+ * this archive for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/errno.h>
+#include <linux/ptrace.h>
+#include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/tracehook.h>
+#include <linux/elf.h>
+#include <linux/uaccess.h>
+#include <trace/syscall.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+/*
+ * user_regset definitions.
+ */
+
+int metag_gp_regs_copyout(const struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         void *kbuf, void __user *ubuf)
+{
+       const void *ptr;
+       unsigned long data;
+       int ret;
+
+       /* D{0-1}.{0-7} */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 regs->ctx.DX, 0, 4*16);
+       if (ret)
+               goto out;
+       /* A{0-1}.{0-1} */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 regs->ctx.AX, 4*16, 4*20);
+       if (ret)
+               goto out;
+       /* A{0-1}.2 */
+       if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+               ptr = regs->ctx.Ext.Ctx.pExt;
+       else
+               ptr = &regs->ctx.Ext.AX2;
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 ptr, 4*20, 4*22);
+       if (ret)
+               goto out;
+       /* A{0-1}.3 */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &regs->ctx.AX3, 4*22, 4*24);
+       if (ret)
+               goto out;
+       /* PC */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &regs->ctx.CurrPC, 4*24, 4*25);
+       if (ret)
+               goto out;
+       /* TXSTATUS */
+       data = (unsigned long)regs->ctx.Flags;
+       if (regs->ctx.SaveMask & TBICTX_CBUF_BIT)
+               data |= USER_GP_REGS_STATUS_CATCH_BIT;
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &data, 4*25, 4*26);
+       if (ret)
+               goto out;
+       /* TXRPT, TXBPOBITS, TXMODE */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &regs->ctx.CurrRPT, 4*26, 4*29);
+       if (ret)
+               goto out;
+       /* Padding */
+       ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                      4*29, 4*30);
+out:
+       return ret;
+}
+
+int metag_gp_regs_copyin(struct pt_regs *regs,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf)
+{
+       void *ptr;
+       unsigned long data;
+       int ret;
+
+       /* D{0-1}.{0-7} */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs->ctx.DX, 0, 4*16);
+       if (ret)
+               goto out;
+       /* A{0-1}.{0-1} */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs->ctx.AX, 4*16, 4*20);
+       if (ret)
+               goto out;
+       /* A{0-1}.2 */
+       if (regs->ctx.SaveMask & TBICTX_XEXT_BIT)
+               ptr = regs->ctx.Ext.Ctx.pExt;
+       else
+               ptr = &regs->ctx.Ext.AX2;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                ptr, 4*20, 4*22);
+       if (ret)
+               goto out;
+       /* A{0-1}.3 */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->ctx.AX3, 4*22, 4*24);
+       if (ret)
+               goto out;
+       /* PC */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->ctx.CurrPC, 4*24, 4*25);
+       if (ret)
+               goto out;
+       /* TXSTATUS */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &data, 4*25, 4*26);
+       if (ret)
+               goto out;
+       regs->ctx.Flags = data & 0xffff;
+       if (data & USER_GP_REGS_STATUS_CATCH_BIT)
+               regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBUF_BIT;
+       else
+               regs->ctx.SaveMask &= ~TBICTX_CBUF_BIT;
+       /* TXRPT, TXBPOBITS, TXMODE */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->ctx.CurrRPT, 4*26, 4*29);
+out:
+       return ret;
+}
+
+static int metag_gp_regs_get(struct task_struct *target,
+                            const struct user_regset *regset,
+                            unsigned int pos, unsigned int count,
+                            void *kbuf, void __user *ubuf)
+{
+       const struct pt_regs *regs = task_pt_regs(target);
+       return metag_gp_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_gp_regs_set(struct task_struct *target,
+                            const struct user_regset *regset,
+                            unsigned int pos, unsigned int count,
+                            const void *kbuf, const void __user *ubuf)
+{
+       struct pt_regs *regs = task_pt_regs(target);
+       return metag_gp_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_cb_regs_copyout(const struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         void *kbuf, void __user *ubuf)
+{
+       int ret;
+
+       /* TXCATCH{0-3} */
+       if (regs->ctx.SaveMask & TBICTX_XCBF_BIT)
+               ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                         regs->extcb0, 0, 4*4);
+       else
+               ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                              0, 4*4);
+       return ret;
+}
+
+int metag_cb_regs_copyin(struct pt_regs *regs,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf)
+{
+       int ret;
+
+       /* TXCATCH{0-3} */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs->extcb0, 0, 4*4);
+       return ret;
+}
+
+static int metag_cb_regs_get(struct task_struct *target,
+                            const struct user_regset *regset,
+                            unsigned int pos, unsigned int count,
+                            void *kbuf, void __user *ubuf)
+{
+       const struct pt_regs *regs = task_pt_regs(target);
+       return metag_cb_regs_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_cb_regs_set(struct task_struct *target,
+                            const struct user_regset *regset,
+                            unsigned int pos, unsigned int count,
+                            const void *kbuf, const void __user *ubuf)
+{
+       struct pt_regs *regs = task_pt_regs(target);
+       return metag_cb_regs_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+int metag_rp_state_copyout(const struct pt_regs *regs,
+                          unsigned int pos, unsigned int count,
+                          void *kbuf, void __user *ubuf)
+{
+       unsigned long mask;
+       u64 *ptr;
+       int ret, i;
+
+       /* Empty read pipeline */
+       if (!(regs->ctx.SaveMask & TBICTX_CBRP_BIT)) {
+               ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                              0, 4*13);
+               goto out;
+       }
+
+       mask = (regs->ctx.CurrDIVTIME & TXDIVTIME_RPMASK_BITS) >>
+               TXDIVTIME_RPMASK_S;
+
+       /* Read pipeline entries */
+       ptr = (void *)&regs->extcb0[1];
+       for (i = 0; i < 6; ++i, ++ptr) {
+               if (mask & (1 << i))
+                       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                                 ptr, 8*i, 8*(i + 1));
+               else
+                       ret = user_regset_copyout_zero(&pos, &count, &kbuf,
+                                                      &ubuf, 8*i, 8*(i + 1));
+               if (ret)
+                       goto out;
+       }
+       /* Mask of entries */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &mask, 4*12, 4*13);
+out:
+       return ret;
+}
+
+int metag_rp_state_copyin(struct pt_regs *regs,
+                         unsigned int pos, unsigned int count,
+                         const void *kbuf, const void __user *ubuf)
+{
+       struct user_rp_state rp;
+       unsigned long long *ptr;
+       int ret, i;
+
+       /* Read the entire pipeline before making any changes */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &rp, 0, 4*13);
+       if (ret)
+               goto out;
+
+       /* Write pipeline entries */
+       ptr = (void *)&regs->extcb0[1];
+       for (i = 0; i < 6; ++i, ++ptr)
+               if (rp.mask & (1 << i))
+                       *ptr = rp.entries[i];
+
+       /* Update RPMask in TXDIVTIME */
+       regs->ctx.CurrDIVTIME &= ~TXDIVTIME_RPMASK_BITS;
+       regs->ctx.CurrDIVTIME |= (rp.mask << TXDIVTIME_RPMASK_S)
+                                & TXDIVTIME_RPMASK_BITS;
+
+       /* Set/clear flags to indicate catch/read pipeline state */
+       if (rp.mask)
+               regs->ctx.SaveMask |= TBICTX_XCBF_BIT | TBICTX_CBRP_BIT;
+       else
+               regs->ctx.SaveMask &= ~TBICTX_CBRP_BIT;
+out:
+       return ret;
+}
+
+static int metag_rp_state_get(struct task_struct *target,
+                             const struct user_regset *regset,
+                             unsigned int pos, unsigned int count,
+                             void *kbuf, void __user *ubuf)
+{
+       const struct pt_regs *regs = task_pt_regs(target);
+       return metag_rp_state_copyout(regs, pos, count, kbuf, ubuf);
+}
+
+static int metag_rp_state_set(struct task_struct *target,
+                             const struct user_regset *regset,
+                             unsigned int pos, unsigned int count,
+                             const void *kbuf, const void __user *ubuf)
+{
+       struct pt_regs *regs = task_pt_regs(target);
+       return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
+}
+
+enum metag_regset {
+       REGSET_GENERAL,
+       REGSET_CBUF,
+       REGSET_READPIPE,
+};
+
+static const struct user_regset metag_regsets[] = {
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n = ELF_NGREG,
+               .size = sizeof(long),
+               .align = sizeof(long long),
+               .get = metag_gp_regs_get,
+               .set = metag_gp_regs_set,
+       },
+       [REGSET_CBUF] = {
+               .core_note_type = NT_METAG_CBUF,
+               .n = sizeof(struct user_cb_regs) / sizeof(long),
+               .size = sizeof(long),
+               .align = sizeof(long long),
+               .get = metag_cb_regs_get,
+               .set = metag_cb_regs_set,
+       },
+       [REGSET_READPIPE] = {
+               .core_note_type = NT_METAG_RPIPE,
+               .n = sizeof(struct user_rp_state) / sizeof(long),
+               .size = sizeof(long),
+               .align = sizeof(long long),
+               .get = metag_rp_state_get,
+               .set = metag_rp_state_set,
+       },
+};
+
+static const struct user_regset_view user_metag_view = {
+       .name = "metag",
+       .e_machine = EM_METAG,
+       .regsets = metag_regsets,
+       .n = ARRAY_SIZE(metag_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+       return &user_metag_view;
+}
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+       /* nothing to do.. */
+}
+
+long arch_ptrace(struct task_struct *child, long request, unsigned long addr,
+                unsigned long data)
+{
+       int ret;
+
+       switch (request) {
+       default:
+               ret = ptrace_request(child, request, addr, data);
+               break;
+       }
+
+       return ret;
+}
+
+int syscall_trace_enter(struct pt_regs *regs)
+{
+       int ret = 0;
+
+       if (test_thread_flag(TIF_SYSCALL_TRACE))
+               ret = tracehook_report_syscall_entry(regs);
+
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_enter(regs, regs->ctx.DX[0].U1);
+
+       return ret ? -1 : regs->ctx.DX[0].U1;
+}
+
+void syscall_trace_leave(struct pt_regs *regs)
+{
+       if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+               trace_sys_exit(regs, regs->ctx.DX[0].U1);
+
+       if (test_thread_flag(TIF_SYSCALL_TRACE))
+               tracehook_report_syscall_exit(regs, 0);
+}
diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c
new file mode 100644 (file)
index 0000000..8792461
--- /dev/null
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * This file contains the architecture-dependant parts of system setup.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/console.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/init.h>
+#include <linux/initrd.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/of_fdt.h>
+#include <linux/pfn.h>
+#include <linux/root_dev.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/start_kernel.h>
+#include <linux/string.h>
+
+#include <asm/cachepart.h>
+#include <asm/clock.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/da.h>
+#include <asm/highmem.h>
+#include <asm/hwthread.h>
+#include <asm/l2cache.h>
+#include <asm/mach/arch.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/mmu.h>
+#include <asm/mmzone.h>
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/traps.h>
+
+/* Priv protect as many registers as possible. */
+#define DEFAULT_PRIV   (TXPRIVEXT_COPRO_BITS           | \
+                        TXPRIVEXT_TXTRIGGER_BIT        | \
+                        TXPRIVEXT_TXGBLCREG_BIT        | \
+                        TXPRIVEXT_ILOCK_BIT            | \
+                        TXPRIVEXT_TXITACCYC_BIT        | \
+                        TXPRIVEXT_TXDIVTIME_BIT        | \
+                        TXPRIVEXT_TXAMAREGX_BIT        | \
+                        TXPRIVEXT_TXTIMERI_BIT         | \
+                        TXPRIVEXT_TXSTATUS_BIT         | \
+                        TXPRIVEXT_TXDISABLE_BIT)
+
+/* Meta2 specific bits. */
+#ifdef CONFIG_METAG_META12
+#define META2_PRIV     0
+#else
+#define META2_PRIV     (TXPRIVEXT_TXTIMER_BIT          | \
+                        TXPRIVEXT_TRACE_BIT)
+#endif
+
+/* Unaligned access checking bits. */
+#ifdef CONFIG_METAG_UNALIGNED
+#define UNALIGNED_PRIV TXPRIVEXT_ALIGNREW_BIT
+#else
+#define UNALIGNED_PRIV 0
+#endif
+
+#define PRIV_BITS      (DEFAULT_PRIV                   | \
+                        META2_PRIV                     | \
+                        UNALIGNED_PRIV)
+
+/*
+ * Protect access to:
+ * 0x06000000-0x07ffffff Direct mapped region
+ * 0x05000000-0x05ffffff MMU table region (Meta1)
+ * 0x04400000-0x047fffff Cache flush region
+ * 0x84000000-0x87ffffff Core cache memory region (Meta2)
+ *
+ * Allow access to:
+ * 0x80000000-0x81ffffff Core code memory region (Meta2)
+ */
+#ifdef CONFIG_METAG_META12
+#define PRIVSYSR_BITS  TXPRIVSYSR_ALL_BITS
+#else
+#define PRIVSYSR_BITS  (TXPRIVSYSR_ALL_BITS & ~TXPRIVSYSR_CORECODE_BIT)
+#endif
+
+/* Protect all 0x02xxxxxx and 0x048xxxxx. */
+#define PIOREG_BITS    0xffffffff
+
+/*
+ * Protect all 0x04000xx0 (system events)
+ * except write combiner flush and write fence (system events 4 and 5).
+ */
+#define PSYREG_BITS    0xfffffffb
+
+
+extern char _heap_start[];
+
+#ifdef CONFIG_METAG_BUILTIN_DTB
+extern u32 __dtb_start[];
+#endif
+
+#ifdef CONFIG_DA_CONSOLE
+/* Our early channel based console driver */
+extern struct console dash_console;
+#endif
+
+struct machine_desc *machine_desc __initdata;
+
+/*
+ * Map a Linux CPU number to a hardware thread ID
+ * In SMP this will be setup with the correct mapping at startup; in UP this
+ * will map to the HW thread on which we are running.
+ */
+u8 cpu_2_hwthread_id[NR_CPUS] __read_mostly = {
+       [0 ... NR_CPUS-1] = BAD_HWTHREAD_ID
+};
+
+/*
+ * Map a hardware thread ID to a Linux CPU number
+ * In SMP this will be fleshed out with the correct CPU ID for a particular
+ * hardware thread. In UP this will be initialised with the boot CPU ID.
+ */
+u8 hwthread_id_2_cpu[4] __read_mostly = {
+       [0 ... 3] = BAD_CPU_ID
+};
+
+/* The relative offset of the MMU mapped memory (from ldlk or bootloader)
+ * to the real physical memory.  This is needed as we have to use the
+ * physical addresses in the MMU tables (pte entries), and not the virtual
+ * addresses.
+ * This variable is used in the __pa() and __va() macros, and should
+ * probably only be used via them.
+ */
+unsigned int meta_memoffset;
+EXPORT_SYMBOL(meta_memoffset);
+
+static char __initdata *original_cmd_line;
+
+DEFINE_PER_CPU(PTBI, pTBI);
+
+/*
+ * Mapping are specified as "CPU_ID:HWTHREAD_ID", e.g.
+ *
+ *     "hwthread_map=0:1,1:2,2:3,3:0"
+ *
+ *     Linux CPU ID    HWTHREAD_ID
+ *     ---------------------------
+ *         0                 1
+ *         1                 2
+ *         2                 3
+ *         3                 0
+ */
+static int __init parse_hwthread_map(char *p)
+{
+       int cpu;
+
+       while (*p) {
+               cpu = (*p++) - '0';
+               if (cpu < 0 || cpu > 9)
+                       goto err_cpu;
+
+               p++;            /* skip semi-colon */
+               cpu_2_hwthread_id[cpu] = (*p++) - '0';
+               if (cpu_2_hwthread_id[cpu] >= 4)
+                       goto err_thread;
+               hwthread_id_2_cpu[cpu_2_hwthread_id[cpu]] = cpu;
+
+               if (*p == ',')
+                       p++;            /* skip comma */
+       }
+
+       return 0;
+err_cpu:
+       pr_err("%s: hwthread_map cpu argument out of range\n", __func__);
+       return -EINVAL;
+err_thread:
+       pr_err("%s: hwthread_map thread argument out of range\n", __func__);
+       return -EINVAL;
+}
+early_param("hwthread_map", parse_hwthread_map);
+
+void __init dump_machine_table(void)
+{
+       struct machine_desc *p;
+       const char **compat;
+
+       pr_info("Available machine support:\n\tNAME\t\tCOMPATIBLE LIST\n");
+       for_each_machine_desc(p) {
+               pr_info("\t%s\t[", p->name);
+               for (compat = p->dt_compat; compat && *compat; ++compat)
+                       printk(" '%s'", *compat);
+               printk(" ]\n");
+       }
+
+       pr_info("\nPlease check your kernel config and/or bootloader.\n");
+
+       hard_processor_halt(HALT_PANIC);
+}
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+static int metag_panic_event(struct notifier_block *this, unsigned long event,
+                            void *ptr)
+{
+       hard_processor_halt(HALT_PANIC);
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block metag_panic_block = {
+       metag_panic_event,
+       NULL,
+       0
+};
+#endif
+
+void __init setup_arch(char **cmdline_p)
+{
+       unsigned long start_pfn;
+       unsigned long text_start = (unsigned long)(&_stext);
+       unsigned long cpu = smp_processor_id();
+       unsigned long heap_start, heap_end;
+       unsigned long start_pte;
+       PTBI _pTBI;
+       PTBISEG p_heap;
+       int heap_id, i;
+
+       metag_cache_probe();
+
+       metag_da_probe();
+#ifdef CONFIG_DA_CONSOLE
+       if (metag_da_enabled()) {
+               /* An early channel based console driver */
+               register_console(&dash_console);
+               add_preferred_console("ttyDA", 1, NULL);
+       }
+#endif
+
+       /* try interpreting the argument as a device tree */
+       machine_desc = setup_machine_fdt(original_cmd_line);
+       /* if it doesn't look like a device tree it must be a command line */
+       if (!machine_desc) {
+#ifdef CONFIG_METAG_BUILTIN_DTB
+               /* try the embedded device tree */
+               machine_desc = setup_machine_fdt(__dtb_start);
+               if (!machine_desc)
+                       panic("Invalid embedded device tree.");
+#else
+               /* use the default machine description */
+               machine_desc = default_machine_desc();
+#endif
+#ifndef CONFIG_CMDLINE_FORCE
+               /* append the bootloader cmdline to any builtin fdt cmdline */
+               if (boot_command_line[0] && original_cmd_line[0])
+                       strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+               strlcat(boot_command_line, original_cmd_line,
+                       COMMAND_LINE_SIZE);
+#endif
+       }
+       setup_meta_clocks(machine_desc->clocks);
+
+       *cmdline_p = boot_command_line;
+       parse_early_param();
+
+       /*
+        * Make sure we don't alias in dcache or icache
+        */
+       check_for_cache_aliasing(cpu);
+
+
+#ifdef CONFIG_METAG_HALT_ON_PANIC
+       atomic_notifier_chain_register(&panic_notifier_list,
+                                      &metag_panic_block);
+#endif
+
+#ifdef CONFIG_DUMMY_CONSOLE
+       conswitchp = &dummy_con;
+#endif
+
+       if (!(__core_reg_get(TXSTATUS) & TXSTATUS_PSTAT_BIT))
+               panic("Privilege must be enabled for this thread.");
+
+       _pTBI = __TBI(TBID_ISTAT_BIT);
+
+       per_cpu(pTBI, cpu) = _pTBI;
+
+       if (!per_cpu(pTBI, cpu))
+               panic("No TBI found!");
+
+       /*
+        * Initialize all interrupt vectors to our copy of __TBIUnExpXXX,
+        * rather than the version from the bootloader. This makes call
+        * stacks easier to understand and may allow us to unmap the
+        * bootloader at some point.
+        *
+        * We need to keep the LWK handler that TBI installed in order to
+        * be able to do inter-thread comms.
+        */
+       for (i = 0; i <= TBID_SIGNUM_MAX; i++)
+               if (i != TBID_SIGNUM_LWK)
+                       _pTBI->fnSigs[i] = __TBIUnExpXXX;
+
+       /* A Meta requirement is that the kernel is loaded (virtually)
+        * at the PAGE_OFFSET.
+        */
+       if (PAGE_OFFSET != text_start)
+               panic("Kernel not loaded at PAGE_OFFSET (%#x) but at %#lx.",
+                     PAGE_OFFSET, text_start);
+
+       start_pte = mmu_read_second_level_page(text_start);
+
+       /*
+        * Kernel pages should have the PRIV bit set by the bootloader.
+        */
+       if (!(start_pte & _PAGE_KERNEL))
+               panic("kernel pte does not have PRIV set");
+
+       /*
+        * See __pa and __va in include/asm/page.h.
+        * This value is negative when running in local space but the
+        * calculations work anyway.
+        */
+       meta_memoffset = text_start - (start_pte & PAGE_MASK);
+
+       /* Now lets look at the heap space */
+       heap_id = (__TBIThreadId() & TBID_THREAD_BITS)
+               + TBID_SEG(0, TBID_SEGSCOPE_LOCAL, TBID_SEGTYPE_HEAP);
+
+       p_heap = __TBIFindSeg(NULL, heap_id);
+
+       if (!p_heap)
+               panic("Could not find heap from TBI!");
+
+       /* The heap begins at the first full page after the kernel data. */
+       heap_start = (unsigned long) &_heap_start;
+
+       /* The heap ends at the end of the heap segment specified with
+        * ldlk.
+        */
+       if (is_global_space(text_start)) {
+               pr_debug("WARNING: running in global space!\n");
+               heap_end = (unsigned long)p_heap->pGAddr + p_heap->Bytes;
+       } else {
+               heap_end = (unsigned long)p_heap->pLAddr + p_heap->Bytes;
+       }
+
+       ROOT_DEV = Root_RAM0;
+
+       /* init_mm is the mm struct used for the first task.  It is then
+        * cloned for all other tasks spawned from that task.
+        *
+        * Note - we are using the virtual addresses here.
+        */
+       init_mm.start_code = (unsigned long)(&_stext);
+       init_mm.end_code = (unsigned long)(&_etext);
+       init_mm.end_data = (unsigned long)(&_edata);
+       init_mm.brk = (unsigned long)heap_start;
+
+       min_low_pfn = PFN_UP(__pa(text_start));
+       max_low_pfn = PFN_DOWN(__pa(heap_end));
+
+       pfn_base = min_low_pfn;
+
+       /* Round max_pfn up to a 4Mb boundary. The free_bootmem_node()
+        * call later makes sure to keep the rounded up pages marked reserved.
+        */
+       max_pfn = max_low_pfn + ((1 << MAX_ORDER) - 1);
+       max_pfn &= ~((1 << MAX_ORDER) - 1);
+
+       start_pfn = PFN_UP(__pa(heap_start));
+
+       if (min_low_pfn & ((1 << MAX_ORDER) - 1)) {
+               /* Theoretically, we could expand the space that the
+                * bootmem allocator covers - much as we do for the
+                * 'high' address, and then tell the bootmem system
+                * that the lowest chunk is 'not available'.  Right
+                * now it is just much easier to constrain the
+                * user to always MAX_ORDER align their kernel space.
+                */
+
+               panic("Kernel must be %d byte aligned, currently at %#lx.",
+                     1 << (MAX_ORDER + PAGE_SHIFT),
+                     min_low_pfn << PAGE_SHIFT);
+       }
+
+#ifdef CONFIG_HIGHMEM
+       highstart_pfn = highend_pfn = max_pfn;
+       high_memory = (void *) __va(PFN_PHYS(highstart_pfn));
+#else
+       high_memory = (void *)__va(PFN_PHYS(max_pfn));
+#endif
+
+       paging_init(heap_end);
+
+       setup_priv();
+
+       /* Setup the boot cpu's mapping. The rest will be setup below. */
+       cpu_2_hwthread_id[smp_processor_id()] = hard_processor_id();
+       hwthread_id_2_cpu[hard_processor_id()] = smp_processor_id();
+
+       /* Copy device tree blob into non-init memory before unflattening */
+       copy_fdt();
+       unflatten_device_tree();
+
+#ifdef CONFIG_SMP
+       smp_init_cpus();
+#endif
+
+       if (machine_desc->init_early)
+               machine_desc->init_early();
+}
+
+static int __init customize_machine(void)
+{
+       /* customizes platform devices, or adds new ones */
+       if (machine_desc->init_machine)
+               machine_desc->init_machine();
+       return 0;
+}
+arch_initcall(customize_machine);
+
+static int __init init_machine_late(void)
+{
+       if (machine_desc->init_late)
+               machine_desc->init_late();
+       return 0;
+}
+late_initcall(init_machine_late);
+
+#ifdef CONFIG_PROC_FS
+/*
+ *     Get CPU information for use by the procfs.
+ */
+static const char *get_cpu_capabilities(unsigned int txenable)
+{
+#ifdef CONFIG_METAG_META21
+       /* See CORE_ID in META HTP.GP TRM - Architecture Overview 2.1.238 */
+       int coreid = metag_in32(METAC_CORE_ID);
+       unsigned int dsp_type = (coreid >> 3) & 7;
+       unsigned int fpu_type = (coreid >> 7) & 3;
+
+       switch (dsp_type | fpu_type << 3) {
+       case (0x00): return "EDSP";
+       case (0x01): return "DSP";
+       case (0x08): return "EDSP+LFPU";
+       case (0x09): return "DSP+LFPU";
+       case (0x10): return "EDSP+FPU";
+       case (0x11): return "DSP+FPU";
+       }
+       return "UNKNOWN";
+
+#else
+       if (!(txenable & TXENABLE_CLASS_BITS))
+               return "DSP";
+       else
+               return "";
+#endif
+}
+
+static int show_cpuinfo(struct seq_file *m, void *v)
+{
+       const char *cpu;
+       unsigned int txenable, thread_id, major, minor;
+       unsigned long clockfreq = get_coreclock();
+#ifdef CONFIG_SMP
+       int i;
+       unsigned long lpj;
+#endif
+
+       cpu = "META";
+
+       txenable = __core_reg_get(TXENABLE);
+       major = (txenable & TXENABLE_MAJOR_REV_BITS) >> TXENABLE_MAJOR_REV_S;
+       minor = (txenable & TXENABLE_MINOR_REV_BITS) >> TXENABLE_MINOR_REV_S;
+       thread_id = (txenable >> 8) & 0x3;
+
+#ifdef CONFIG_SMP
+       for_each_online_cpu(i) {
+               lpj = per_cpu(cpu_data, i).loops_per_jiffy;
+               txenable = core_reg_read(TXUCT_ID, TXENABLE_REGNUM,
+                                                       cpu_2_hwthread_id[i]);
+
+               seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+                             "Clocking:\t%lu.%1luMHz\n"
+                             "BogoMips:\t%lu.%02lu\n"
+                             "Calibration:\t%lu loops\n"
+                             "Capabilities:\t%s\n\n",
+                             cpu, major, minor, i,
+                             clockfreq / 1000000, (clockfreq / 100000) % 10,
+                             lpj / (500000 / HZ), (lpj / (5000 / HZ)) % 100,
+                             lpj,
+                             get_cpu_capabilities(txenable));
+       }
+#else
+       seq_printf(m, "CPU:\t\t%s %d.%d (thread %d)\n"
+                  "Clocking:\t%lu.%1luMHz\n"
+                  "BogoMips:\t%lu.%02lu\n"
+                  "Calibration:\t%lu loops\n"
+                  "Capabilities:\t%s\n",
+                  cpu, major, minor, thread_id,
+                  clockfreq / 1000000, (clockfreq / 100000) % 10,
+                  loops_per_jiffy / (500000 / HZ),
+                  (loops_per_jiffy / (5000 / HZ)) % 100,
+                  loops_per_jiffy,
+                  get_cpu_capabilities(txenable));
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_METAG_L2C
+       if (meta_l2c_is_present()) {
+               seq_printf(m, "L2 cache:\t%s\n"
+                             "L2 cache size:\t%d KB\n",
+                             meta_l2c_is_enabled() ? "enabled" : "disabled",
+                             meta_l2c_size() >> 10);
+       }
+#endif
+       return 0;
+}
+
+static void *c_start(struct seq_file *m, loff_t *pos)
+{
+       return (void *)(*pos == 0);
+}
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return NULL;
+}
+static void c_stop(struct seq_file *m, void *v)
+{
+}
+const struct seq_operations cpuinfo_op = {
+       .start = c_start,
+       .next  = c_next,
+       .stop  = c_stop,
+       .show  = show_cpuinfo,
+};
+#endif /* CONFIG_PROC_FS */
+
+void __init metag_start_kernel(char *args)
+{
+       /* Zero the timer register so timestamps are from the point at
+        * which the kernel started running.
+        */
+       __core_reg_set(TXTIMER, 0);
+
+       /* Clear the bss. */
+       memset(__bss_start, 0,
+              (unsigned long)__bss_stop - (unsigned long)__bss_start);
+
+       /* Remember where these are for use in setup_arch */
+       original_cmd_line = args;
+
+       current_thread_info()->cpu = hard_processor_id();
+
+       start_kernel();
+}
+
+/**
+ * setup_priv() - Set up privilege protection registers.
+ *
+ * Set up privilege protection registers such as TXPRIVEXT to prevent userland
+ * from touching our precious registers and sensitive memory areas.
+ */
+void setup_priv(void)
+{
+       unsigned int offset = hard_processor_id() << TXPRIVREG_STRIDE_S;
+
+       __core_reg_set(TXPRIVEXT, PRIV_BITS);
+
+       metag_out32(PRIVSYSR_BITS, T0PRIVSYSR + offset);
+       metag_out32(PIOREG_BITS,   T0PIOREG   + offset);
+       metag_out32(PSYREG_BITS,   T0PSYREG   + offset);
+}
+
+PTBI pTBI_get(unsigned int cpu)
+{
+       return per_cpu(pTBI, cpu);
+}
+EXPORT_SYMBOL(pTBI_get);
+
+#if defined(CONFIG_METAG_DSP) && defined(CONFIG_METAG_FPU)
+char capabilites[] = "dsp fpu";
+#elif defined(CONFIG_METAG_DSP)
+char capabilites[] = "dsp";
+#elif defined(CONFIG_METAG_FPU)
+char capabilites[] = "fpu";
+#else
+char capabilites[] = "";
+#endif
+
+static struct ctl_table caps_kern_table[] = {
+       {
+               .procname       = "capabilities",
+               .data           = capabilites,
+               .maxlen         = sizeof(capabilites),
+               .mode           = 0444,
+               .proc_handler   = proc_dostring,
+       },
+       {}
+};
+
+static struct ctl_table caps_root_table[] = {
+       {
+               .procname       = "kernel",
+               .mode           = 0555,
+               .child          = caps_kern_table,
+       },
+       {}
+};
+
+static int __init capabilities_register_sysctl(void)
+{
+       struct ctl_table_header *caps_table_header;
+
+       caps_table_header = register_sysctl_table(caps_root_table);
+       if (!caps_table_header) {
+               pr_err("Unable to register CAPABILITIES sysctl\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+core_initcall(capabilities_register_sysctl);
diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c
new file mode 100644 (file)
index 0000000..3be61cf
--- /dev/null
@@ -0,0 +1,344 @@
+/*
+ *  Copyright (C) 1991,1992  Linus Torvalds
+ *  Copyright (C) 2005-2012  Imagination Technologies Ltd.
+ *
+ *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/kernel.h>
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/wait.h>
+#include <linux/ptrace.h>
+#include <linux/unistd.h>
+#include <linux/stddef.h>
+#include <linux/personality.h>
+#include <linux/uaccess.h>
+#include <linux/tracehook.h>
+
+#include <asm/ucontext.h>
+#include <asm/cacheflush.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+#define REG_FLAGS      ctx.SaveMask
+#define REG_RETVAL     ctx.DX[0].U0
+#define REG_SYSCALL    ctx.DX[0].U1
+#define REG_SP         ctx.AX[0].U0
+#define REG_ARG1       ctx.DX[3].U1
+#define REG_ARG2       ctx.DX[3].U0
+#define REG_ARG3       ctx.DX[2].U1
+#define REG_PC         ctx.CurrPC
+#define REG_RTP                ctx.DX[4].U1
+
+struct rt_sigframe {
+       struct siginfo info;
+       struct ucontext uc;
+       unsigned long retcode[2];
+};
+
+static int restore_sigcontext(struct pt_regs *regs,
+                             struct sigcontext __user *sc)
+{
+       int err;
+
+       /* Always make any pending restarted system calls return -EINTR */
+       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+       err = metag_gp_regs_copyin(regs, 0, sizeof(struct user_gp_regs), NULL,
+                                  &sc->regs);
+       if (!err)
+               err = metag_cb_regs_copyin(regs, 0,
+                                          sizeof(struct user_cb_regs), NULL,
+                                          &sc->cb);
+       if (!err)
+               err = metag_rp_state_copyin(regs, 0,
+                                           sizeof(struct user_rp_state), NULL,
+                                           &sc->rp);
+
+       /* This is a user-mode context. */
+       regs->REG_FLAGS |= TBICTX_PRIV_BIT;
+
+       return err;
+}
+
+long sys_rt_sigreturn(void)
+{
+       /* NOTE - Meta stack goes UPWARDS - so we wind the stack back */
+       struct pt_regs *regs = current_pt_regs();
+       struct rt_sigframe __user *frame;
+       sigset_t set;
+
+       frame = (__force struct rt_sigframe __user *)(regs->REG_SP -
+                                                     sizeof(*frame));
+
+       if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+               goto badframe;
+
+       if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
+               goto badframe;
+
+       set_current_blocked(&set);
+
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+               goto badframe;
+
+       if (restore_altstack(&frame->uc.uc_stack))
+               goto badframe;
+
+       return regs->REG_RETVAL;
+
+badframe:
+       force_sig(SIGSEGV, current);
+
+       return 0;
+}
+
+static int setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
+                           unsigned long mask)
+{
+       int err;
+
+       err = metag_gp_regs_copyout(regs, 0, sizeof(struct user_gp_regs), NULL,
+                                   &sc->regs);
+
+       if (!err)
+               err = metag_cb_regs_copyout(regs, 0,
+                                           sizeof(struct user_cb_regs), NULL,
+                                           &sc->cb);
+       if (!err)
+               err = metag_rp_state_copyout(regs, 0,
+                                            sizeof(struct user_rp_state), NULL,
+                                            &sc->rp);
+
+       /* OK, clear that cbuf flag in the old context, or our stored
+        * catch buffer will be restored when we go to call the signal
+        * handler. Also clear out the CBRP RA/RD pipe bit incase
+        * that is pending as well!
+        * Note that as we have already stored this context, these
+        * flags will get restored on sigreturn to their original
+        * state.
+        */
+       regs->REG_FLAGS &= ~(TBICTX_XCBF_BIT | TBICTX_CBUF_BIT |
+                            TBICTX_CBRP_BIT);
+
+       /* Clear out the LSM_STEP bits in case we are in the middle of
+        * and MSET/MGET.
+        */
+       regs->ctx.Flags &= ~TXSTATUS_LSM_STEP_BITS;
+
+       err |= __put_user(mask, &sc->oldmask);
+
+       return err;
+}
+
+/*
+ * Determine which stack to use..
+ */
+static void __user *get_sigframe(struct k_sigaction *ka, unsigned long sp,
+                                size_t frame_size)
+{
+       /* Meta stacks grows upwards */
+       if ((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0))
+               sp = current->sas_ss_sp;
+
+       sp = (sp + 7) & ~7;                     /* 8byte align stack */
+
+       return (void __user *)sp;
+}
+
+static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+                         sigset_t *set, struct pt_regs *regs)
+{
+       struct rt_sigframe __user *frame;
+       int err = -EFAULT;
+       unsigned long code;
+
+       frame = get_sigframe(ka, regs->REG_SP, sizeof(*frame));
+       if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+               goto out;
+
+       err = copy_siginfo_to_user(&frame->info, info);
+
+       /* Create the ucontext.  */
+       err |= __put_user(0, &frame->uc.uc_flags);
+       err |= __put_user(0, (unsigned long __user *)&frame->uc.uc_link);
+       err |= __save_altstack(&frame->uc.uc_stack, regs->REG_SP);
+       err |= setup_sigcontext(&frame->uc.uc_mcontext,
+                               regs, set->sig[0]);
+       err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+       if (err)
+               goto out;
+
+       /* Set up to return from userspace.  */
+
+       /* MOV D1Re0 (D1.0), #__NR_rt_sigreturn */
+       code = 0x03000004 | (__NR_rt_sigreturn << 3);
+       err |= __put_user(code, (unsigned long __user *)(&frame->retcode[0]));
+
+       /* SWITCH #__METAG_SW_SYS */
+       code = __METAG_SW_ENCODING(SYS);
+       err |= __put_user(code, (unsigned long __user *)(&frame->retcode[1]));
+
+       if (err)
+               goto out;
+
+       /* Set up registers for signal handler */
+       regs->REG_RTP = (unsigned long) frame->retcode;
+       regs->REG_SP = (unsigned long) frame + sizeof(*frame);
+       regs->REG_ARG1 = sig;
+       regs->REG_ARG2 = (unsigned long) &frame->info;
+       regs->REG_ARG3 = (unsigned long) &frame->uc;
+       regs->REG_PC = (unsigned long) ka->sa.sa_handler;
+
+       pr_debug("SIG deliver (%s:%d): sp=%p pc=%08x pr=%08x\n",
+                current->comm, current->pid, frame, regs->REG_PC,
+                regs->REG_RTP);
+
+       /* Now pass size of 'new code' into sigtramp so we can do a more
+        * effective cache flush - directed rather than 'full flush'.
+        */
+       flush_cache_sigtramp(regs->REG_RTP, sizeof(frame->retcode));
+out:
+       if (err) {
+               force_sigsegv(sig, current);
+               return -EFAULT;
+       }
+       return 0;
+}
+
+static void handle_signal(unsigned long sig, siginfo_t *info,
+                         struct k_sigaction *ka, struct pt_regs *regs)
+{
+       sigset_t *oldset = sigmask_to_save();
+
+       /* Set up the stack frame */
+       if (setup_rt_frame(sig, ka, info, oldset, regs))
+               return;
+
+       signal_delivered(sig, info, ka, regs, test_thread_flag(TIF_SINGLESTEP));
+}
+
+ /*
+  * Notes for Meta.
+  * We have moved from the old 2.4.9 SH way of using syscall_nr (in the stored
+  * context) to passing in the syscall flag on the stack.
+  * This is because having syscall_nr in our context does not fit with TBX, and
+  * corrupted the stack.
+  */
+static int do_signal(struct pt_regs *regs, int syscall)
+{
+       unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
+       struct k_sigaction ka;
+       siginfo_t info;
+       int signr;
+       int restart = 0;
+
+       /*
+        * By the end of rt_sigreturn the context describes the point that the
+        * signal was taken (which may happen to be just before a syscall if
+        * it's already been restarted). This should *never* be mistaken for a
+        * system call in need of restarting.
+        */
+       if (syscall == __NR_rt_sigreturn)
+               syscall = -1;
+
+       /* Did we come from a system call? */
+       if (syscall >= 0) {
+               continue_addr = regs->REG_PC;
+               restart_addr = continue_addr - 4;
+               retval = regs->REG_RETVAL;
+
+               /*
+                * Prepare for system call restart. We do this here so that a
+                * debugger will see the already changed PC.
+                */
+               switch (retval) {
+               case -ERESTART_RESTARTBLOCK:
+                       restart = -2;
+               case -ERESTARTNOHAND:
+               case -ERESTARTSYS:
+               case -ERESTARTNOINTR:
+                       ++restart;
+                       regs->REG_PC = restart_addr;
+                       break;
+               }
+       }
+
+       /*
+        * Get the signal to deliver. When running under ptrace, at this point
+        * the debugger may change all our registers ...
+        */
+       signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+       /*
+        * Depending on the signal settings we may need to revert the decision
+        * to restart the system call. But skip this if a debugger has chosen to
+        * restart at a different PC.
+        */
+       if (regs->REG_PC != restart_addr)
+               restart = 0;
+       if (signr > 0) {
+               if (unlikely(restart)) {
+                       if (retval == -ERESTARTNOHAND
+                           || retval == -ERESTART_RESTARTBLOCK
+                           || (retval == -ERESTARTSYS
+                               && !(ka.sa.sa_flags & SA_RESTART))) {
+                               regs->REG_RETVAL = -EINTR;
+                               regs->REG_PC = continue_addr;
+                       }
+               }
+
+               /* Whee! Actually deliver the signal.  */
+               handle_signal(signr, &info, &ka, regs);
+               return 0;
+       }
+
+       /* Handlerless -ERESTART_RESTARTBLOCK re-enters via restart_syscall */
+       if (unlikely(restart < 0))
+               regs->REG_SYSCALL = __NR_restart_syscall;
+
+       /*
+        * If there's no signal to deliver, we just put the saved sigmask back.
+        */
+       restore_saved_sigmask();
+
+       return restart;
+}
+
+int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+                   int syscall)
+{
+       do {
+               if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+                       schedule();
+               } else {
+                       if (unlikely(!user_mode(regs)))
+                               return 0;
+                       local_irq_enable();
+                       if (thread_flags & _TIF_SIGPENDING) {
+                               int restart = do_signal(regs, syscall);
+                               if (unlikely(restart)) {
+                                       /*
+                                        * Restart without handlers.
+                                        * Deal with it without leaving
+                                        * the kernel space.
+                                        */
+                                       return restart;
+                               }
+                               syscall = -1;
+                       } else {
+                               clear_thread_flag(TIF_NOTIFY_RESUME);
+                               tracehook_notify_resume(regs);
+                       }
+               }
+               local_irq_disable();
+               thread_flags = current_thread_info()->flags;
+       } while (thread_flags & _TIF_WORK_MASK);
+       return 0;
+}
diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c
new file mode 100644 (file)
index 0000000..4b6d1f1
--- /dev/null
@@ -0,0 +1,575 @@
+/*
+ *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
+ *
+ *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/cache.h>
+#include <linux/profile.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/err.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/seq_file.h>
+#include <linux/irq.h>
+#include <linux/bootmem.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cachepart.h>
+#include <asm/core_reg.h>
+#include <asm/cpu.h>
+#include <asm/mmu_context.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+#include <asm/hwthread.h>
+#include <asm/traps.h>
+
+DECLARE_PER_CPU(PTBI, pTBI);
+
+void *secondary_data_stack;
+
+/*
+ * structures for inter-processor calls
+ * - A collection of single bit ipi messages.
+ */
+struct ipi_data {
+       spinlock_t lock;
+       unsigned long ipi_count;
+       unsigned long bits;
+};
+
+static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
+       .lock   = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
+};
+
+static DEFINE_SPINLOCK(boot_lock);
+
+/*
+ * "thread" is assumed to be a valid Meta hardware thread ID.
+ */
+int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
+{
+       u32 val;
+
+       /*
+        * set synchronisation state between this boot processor
+        * and the secondary one
+        */
+       spin_lock(&boot_lock);
+
+       core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
+       core_reg_write(TXUPC_ID, 1, thread, 0);
+
+       /*
+        * Give the thread privilege (PSTAT) and clear potentially problematic
+        * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
+        */
+       core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);
+
+       /* Clear the minim enable bit. */
+       val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
+       core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);
+
+       /*
+        * set the ThreadEnable bit (0x1) in the TXENABLE register
+        * for the specified thread - off it goes!
+        */
+       val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
+       core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);
+
+       /*
+        * now the secondary core is starting up let it run its
+        * calibrations, then wait for it to finish
+        */
+       spin_unlock(&boot_lock);
+
+       return 0;
+}
+
+int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+       unsigned int thread = cpu_2_hwthread_id[cpu];
+       int ret;
+
+       load_pgd(swapper_pg_dir, thread);
+
+       flush_tlb_all();
+
+       /*
+        * Tell the secondary CPU where to find its idle thread's stack.
+        */
+       secondary_data_stack = task_stack_page(idle);
+
+       wmb();
+
+       /*
+        * Now bring the CPU into our world.
+        */
+       ret = boot_secondary(thread, idle);
+       if (ret == 0) {
+               unsigned long timeout;
+
+               /*
+                * CPU was successfully started, wait for it
+                * to come online or time out.
+                */
+               timeout = jiffies + HZ;
+               while (time_before(jiffies, timeout)) {
+                       if (cpu_online(cpu))
+                               break;
+
+                       udelay(10);
+                       barrier();
+               }
+
+               if (!cpu_online(cpu))
+                       ret = -EIO;
+       }
+
+       secondary_data_stack = NULL;
+
+       if (ret) {
+               pr_crit("CPU%u: processor failed to boot\n", cpu);
+
+               /*
+                * FIXME: We need to clean up the new idle thread. --rmk
+                */
+       }
+
+       return ret;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DECLARE_COMPLETION(cpu_killed);
+
+/*
+ * __cpu_disable runs on the processor to be shutdown.
+ */
+int __cpuexit __cpu_disable(void)
+{
+       unsigned int cpu = smp_processor_id();
+       struct task_struct *p;
+
+       /*
+        * Take this CPU offline.  Once we clear this, we can't return,
+        * and we must not schedule until we're ready to give up the cpu.
+        */
+       set_cpu_online(cpu, false);
+
+       /*
+        * OK - migrate IRQs away from this CPU
+        */
+       migrate_irqs();
+
+       /*
+        * Flush user cache and TLB mappings, and then remove this CPU
+        * from the vm mask set of all processes.
+        */
+       flush_cache_all();
+       local_flush_tlb_all();
+
+       read_lock(&tasklist_lock);
+       for_each_process(p) {
+               if (p->mm)
+                       cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
+       }
+       read_unlock(&tasklist_lock);
+
+       return 0;
+}
+
+/*
+ * called on the thread which is asking for a CPU to be shutdown -
+ * waits until shutdown has completed, or it is timed out.
+ */
+void __cpuexit __cpu_die(unsigned int cpu)
+{
+       if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
+               pr_err("CPU%u: unable to kill\n", cpu);
+}
+
+/*
+ * Called from the idle thread for the CPU which has been shutdown.
+ *
+ * Note that we do not return from this function. If this cpu is
+ * brought online again it will need to run secondary_startup().
+ */
+void __cpuexit cpu_die(void)
+{
+       local_irq_disable();
+       idle_task_exit();
+
+       complete(&cpu_killed);
+
+       asm ("XOR       TXENABLE, D0Re0,D0Re0\n");
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+/*
+ * Called by both boot and secondaries to move global data into
+ * per-processor storage.
+ */
+void __cpuinit smp_store_cpu_info(unsigned int cpuid)
+{
+       struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);
+
+       cpu_info->loops_per_jiffy = loops_per_jiffy;
+}
+
+/*
+ * This is the secondary CPU boot entry.  We're using this CPUs
+ * idle thread stack and the global page tables.
+ */
+asmlinkage void secondary_start_kernel(void)
+{
+       struct mm_struct *mm = &init_mm;
+       unsigned int cpu = smp_processor_id();
+
+       /*
+        * All kernel threads share the same mm context; grab a
+        * reference and switch to it.
+        */
+       atomic_inc(&mm->mm_users);
+       atomic_inc(&mm->mm_count);
+       current->active_mm = mm;
+       cpumask_set_cpu(cpu, mm_cpumask(mm));
+       enter_lazy_tlb(mm, current);
+       local_flush_tlb_all();
+
+       /*
+        * TODO: Some day it might be useful for each Linux CPU to
+        * have its own TBI structure. That would allow each Linux CPU
+        * to run different interrupt handlers for the same IRQ
+        * number.
+        *
+        * For now, simply copying the pointer to the boot CPU's TBI
+        * structure is sufficient because we always want to run the
+        * same interrupt handler whatever CPU takes the interrupt.
+        */
+       per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+       if (!per_cpu(pTBI, cpu))
+               panic("No TBI found!");
+
+       per_cpu_trap_init(cpu);
+
+       preempt_disable();
+
+       setup_priv();
+
+       /*
+        * Enable local interrupts.
+        */
+       tbi_startup_interrupt(TBID_SIGNUM_TRT);
+       notify_cpu_starting(cpu);
+       local_irq_enable();
+
+       pr_info("CPU%u (thread %u): Booted secondary processor\n",
+               cpu, cpu_2_hwthread_id[cpu]);
+
+       calibrate_delay();
+       smp_store_cpu_info(cpu);
+
+       /*
+        * OK, now it's safe to let the boot CPU continue
+        */
+       set_cpu_online(cpu, true);
+
+       /*
+        * Check for cache aliasing.
+        * Preemption is disabled
+        */
+       check_for_cache_aliasing(cpu);
+
+       /*
+        * OK, it's off to the idle thread for us
+        */
+       cpu_idle();
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+       int cpu;
+       unsigned long bogosum = 0;
+
+       for_each_online_cpu(cpu)
+               bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;
+
+       pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+               num_online_cpus(),
+               bogosum / (500000/HZ),
+               (bogosum / (5000/HZ)) % 100);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+       unsigned int cpu = smp_processor_id();
+
+       init_new_context(current, &init_mm);
+       current_thread_info()->cpu = cpu;
+
+       smp_store_cpu_info(cpu);
+       init_cpu_present(cpu_possible_mask);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+       unsigned int cpu = smp_processor_id();
+
+       per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);
+
+       if (!per_cpu(pTBI, cpu))
+               panic("No TBI found!");
+}
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);
+
+static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
+{
+       unsigned long flags;
+       unsigned int cpu;
+       cpumask_t map;
+
+       cpumask_clear(&map);
+       local_irq_save(flags);
+
+       for_each_cpu(cpu, mask) {
+               struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+
+               spin_lock(&ipi->lock);
+
+               /*
+                * KICK interrupts are queued in hardware so we'll get
+                * multiple interrupts if we call smp_cross_call()
+                * multiple times for one msg. The problem is that we
+                * only have one bit for each message - we can't queue
+                * them in software.
+                *
+                * The first time through ipi_handler() we'll clear
+                * the msg bit, having done all the work. But when we
+                * return we'll get _another_ interrupt (and another,
+                * and another until we've handled all the queued
+                * KICKs). Running ipi_handler() when there's no work
+                * to do is bad because that's how kick handler
+                * chaining detects who the KICK was intended for.
+                * See arch/metag/kernel/kick.c for more details.
+                *
+                * So only add 'cpu' to 'map' if we haven't already
+                * queued a KICK interrupt for 'msg'.
+                */
+               if (!(ipi->bits & (1 << msg))) {
+                       ipi->bits |= 1 << msg;
+                       cpumask_set_cpu(cpu, &map);
+               }
+
+               spin_unlock(&ipi->lock);
+       }
+
+       /*
+        * Call the platform specific cross-CPU call function.
+        */
+       smp_cross_call(map, msg);
+
+       local_irq_restore(flags);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+       send_ipi_message(mask, IPI_CALL_FUNC);
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+       send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+void show_ipi_list(struct seq_file *p)
+{
+       unsigned int cpu;
+
+       seq_puts(p, "IPI:");
+
+       for_each_present_cpu(cpu)
+               seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);
+
+       seq_putc(p, '\n');
+}
+
+static DEFINE_SPINLOCK(stop_lock);
+
+/*
+ * Main handler for inter-processor interrupts
+ *
+ * For Meta, the ipimask now only identifies a single
+ * category of IPI (Bit 1 IPIs have been replaced by a
+ * different mechanism):
+ *
+ *  Bit 0 - Inter-processor function call
+ */
+static int do_IPI(struct pt_regs *regs)
+{
+       unsigned int cpu = smp_processor_id();
+       struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
+       struct pt_regs *old_regs = set_irq_regs(regs);
+       unsigned long msgs, nextmsg;
+       int handled = 0;
+
+       ipi->ipi_count++;
+
+       spin_lock(&ipi->lock);
+       msgs = ipi->bits;
+       nextmsg = msgs & -msgs;
+       ipi->bits &= ~nextmsg;
+       spin_unlock(&ipi->lock);
+
+       if (nextmsg) {
+               handled = 1;
+
+               nextmsg = ffz(~nextmsg);
+               switch (nextmsg) {
+               case IPI_RESCHEDULE:
+                       scheduler_ipi();
+                       break;
+
+               case IPI_CALL_FUNC:
+                       generic_smp_call_function_interrupt();
+                       break;
+
+               case IPI_CALL_FUNC_SINGLE:
+                       generic_smp_call_function_single_interrupt();
+                       break;
+
+               default:
+                       pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
+                               cpu, nextmsg);
+                       break;
+               }
+       }
+
+       set_irq_regs(old_regs);
+
+       return handled;
+}
+
+void smp_send_reschedule(int cpu)
+{
+       send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+static void stop_this_cpu(void *data)
+{
+       unsigned int cpu = smp_processor_id();
+
+       if (system_state == SYSTEM_BOOTING ||
+           system_state == SYSTEM_RUNNING) {
+               spin_lock(&stop_lock);
+               pr_crit("CPU%u: stopping\n", cpu);
+               dump_stack();
+               spin_unlock(&stop_lock);
+       }
+
+       set_cpu_online(cpu, false);
+
+       local_irq_disable();
+
+       hard_processor_halt(HALT_OK);
+}
+
+void smp_send_stop(void)
+{
+       smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+/*
+ * not supported here
+ */
+int setup_profiling_timer(unsigned int multiplier)
+{
+       return -EINVAL;
+}
+
+/*
+ * We use KICKs for inter-processor interrupts.
+ *
+ * For every CPU in "callmap" the IPI data must already have been
+ * stored in that CPU's "ipi_data" member prior to calling this
+ * function.
+ */
+static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
+{
+       int cpu;
+
+       for_each_cpu(cpu, &callmap) {
+               unsigned int thread;
+
+               thread = cpu_2_hwthread_id[cpu];
+
+               BUG_ON(thread == BAD_HWTHREAD_ID);
+
+               metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
+       }
+}
+
+static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
+                  int Inst, PTBI pTBI, int *handled)
+{
+       *handled = do_IPI((struct pt_regs *)State.Sig.pCtx);
+
+       return State;
+}
+
+static struct kick_irq_handler ipi_irq = {
+       .func = ipi_handler,
+};
+
+static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
+{
+       kick_raise_softirq(callmap, 1);
+}
+
+static inline unsigned int get_core_count(void)
+{
+       int i;
+       unsigned int ret = 0;
+
+       for (i = 0; i < CONFIG_NR_CPUS; i++) {
+               if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
+                       ret++;
+       }
+
+       return ret;
+}
+
+/*
+ * Initialise the CPU possible map early - this describes the CPUs
+ * which may be present or become present in the system.
+ */
+void __init smp_init_cpus(void)
+{
+       unsigned int i, ncores = get_core_count();
+
+       /* If no hwthread_map early param was set use default mapping */
+       for (i = 0; i < NR_CPUS; i++)
+               if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
+                       cpu_2_hwthread_id[i] = i;
+                       hwthread_id_2_cpu[i] = i;
+               }
+
+       for (i = 0; i < ncores; i++)
+               set_cpu_possible(i, true);
+
+       kick_register_func(&ipi_irq);
+}
diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c
new file mode 100644 (file)
index 0000000..5510361
--- /dev/null
@@ -0,0 +1,187 @@
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/stacktrace.h>
+
+#include <asm/stacktrace.h>
+
+#if defined(CONFIG_FRAME_POINTER)
+
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+
+static unsigned long tbi_boing_addr;
+static unsigned long tbi_boing_size;
+
+static void tbi_boing_init(void)
+{
+       /* We need to know where TBIBoingVec is and it's size */
+       unsigned long size;
+       unsigned long offset;
+       char modname[MODULE_NAME_LEN];
+       char name[KSYM_NAME_LEN];
+       tbi_boing_addr = kallsyms_lookup_name("___TBIBoingVec");
+       if (!tbi_boing_addr)
+               tbi_boing_addr = 1;
+       else if (!lookup_symbol_attrs(tbi_boing_addr, &size,
+                                     &offset, modname, name))
+               tbi_boing_size = size;
+}
+#endif
+
+#define ALIGN_DOWN(addr, size)  ((addr)&(~((size)-1)))
+
+/*
+ * Unwind the current stack frame and store the new register values in the
+ * structure passed as argument. Unwinding is equivalent to a function return,
+ * hence the new PC value rather than LR should be used for backtrace.
+ */
+int notrace unwind_frame(struct stackframe *frame)
+{
+       struct metag_frame *fp = (struct metag_frame *)frame->fp;
+       unsigned long lr;
+       unsigned long fpnew;
+
+       if (frame->fp & 0x7)
+               return -EINVAL;
+
+       fpnew = fp->fp;
+       lr = fp->lr - 4;
+
+#ifdef CONFIG_KALLSYMS
+       /* If we've reached TBIBoingVec then we're at an interrupt
+        * entry point or a syscall entry point. The frame pointer
+        * points to a pt_regs which can be used to continue tracing on
+        * the other side of the boing.
+        */
+       if (!tbi_boing_addr)
+               tbi_boing_init();
+       if (tbi_boing_size && lr >= tbi_boing_addr &&
+           lr < tbi_boing_addr + tbi_boing_size) {
+               struct pt_regs *regs = (struct pt_regs *)fpnew;
+               if (user_mode(regs))
+                       return -EINVAL;
+               fpnew = regs->ctx.AX[1].U0;
+               lr = regs->ctx.DX[4].U1;
+       }
+#endif
+
+       /* stack grows up, so frame pointers must decrease */
+       if (fpnew < (ALIGN_DOWN((unsigned long)fp, THREAD_SIZE) +
+                    sizeof(struct thread_info)) || fpnew >= (unsigned long)fp)
+               return -EINVAL;
+
+       /* restore the registers from the stack frame */
+       frame->fp = fpnew;
+       frame->pc = lr;
+
+       return 0;
+}
+#else
+int notrace unwind_frame(struct stackframe *frame)
+{
+       struct metag_frame *sp = (struct metag_frame *)frame->sp;
+
+       if (frame->sp & 0x7)
+               return -EINVAL;
+
+       while (!kstack_end(sp)) {
+               unsigned long addr = sp->lr - 4;
+               sp--;
+
+               if (__kernel_text_address(addr)) {
+                       frame->sp = (unsigned long)sp;
+                       frame->pc = addr;
+                       return 0;
+               }
+       }
+       return -EINVAL;
+}
+#endif
+
+void notrace walk_stackframe(struct stackframe *frame,
+                    int (*fn)(struct stackframe *, void *), void *data)
+{
+       while (1) {
+               int ret;
+
+               if (fn(frame, data))
+                       break;
+               ret = unwind_frame(frame);
+               if (ret < 0)
+                       break;
+       }
+}
+EXPORT_SYMBOL(walk_stackframe);
+
+#ifdef CONFIG_STACKTRACE
+struct stack_trace_data {
+       struct stack_trace *trace;
+       unsigned int no_sched_functions;
+       unsigned int skip;
+};
+
+static int save_trace(struct stackframe *frame, void *d)
+{
+       struct stack_trace_data *data = d;
+       struct stack_trace *trace = data->trace;
+       unsigned long addr = frame->pc;
+
+       if (data->no_sched_functions && in_sched_functions(addr))
+               return 0;
+       if (data->skip) {
+               data->skip--;
+               return 0;
+       }
+
+       trace->entries[trace->nr_entries++] = addr;
+
+       return trace->nr_entries >= trace->max_entries;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+       struct stack_trace_data data;
+       struct stackframe frame;
+
+       data.trace = trace;
+       data.skip = trace->skip;
+
+       if (tsk != current) {
+#ifdef CONFIG_SMP
+               /*
+                * What guarantees do we have here that 'tsk' is not
+                * running on another CPU?  For now, ignore it as we
+                * can't guarantee we won't explode.
+                */
+               if (trace->nr_entries < trace->max_entries)
+                       trace->entries[trace->nr_entries++] = ULONG_MAX;
+               return;
+#else
+               data.no_sched_functions = 1;
+               frame.fp = thread_saved_fp(tsk);
+               frame.sp = thread_saved_sp(tsk);
+               frame.lr = 0;           /* recovered from the stack */
+               frame.pc = thread_saved_pc(tsk);
+#endif
+       } else {
+               register unsigned long current_sp asm ("A0StP");
+
+               data.no_sched_functions = 0;
+               frame.fp = (unsigned long)__builtin_frame_address(0);
+               frame.sp = current_sp;
+               frame.lr = (unsigned long)__builtin_return_address(0);
+               frame.pc = (unsigned long)save_stack_trace_tsk;
+       }
+
+       walk_stackframe(&frame, save_trace, &data);
+       if (trace->nr_entries < trace->max_entries)
+               trace->entries[trace->nr_entries++] = ULONG_MAX;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+       save_stack_trace_tsk(current, trace);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+#endif
diff --git a/arch/metag/kernel/sys_metag.c b/arch/metag/kernel/sys_metag.c
new file mode 100644 (file)
index 0000000..efe833a
--- /dev/null
@@ -0,0 +1,180 @@
+/*
+ * This file contains various random system calls that
+ * have a non-standard calling sequence on the Linux/Meta
+ * platform.
+ */
+
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/unistd.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/switch.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+#include <asm/user_gateway.h>
+
+#define merge_64(hi, lo) ((((unsigned long long)(hi)) << 32) + \
+                         ((lo) & 0xffffffffUL))
+
+int metag_mmap_check(unsigned long addr, unsigned long len,
+                    unsigned long flags)
+{
+       /* We can't have people trying to write to the bottom of the
+        * memory map, there are mysterious unspecified things there that
+        * we don't want people trampling on.
+        */
+       if ((flags & MAP_FIXED) && (addr < TASK_UNMAPPED_BASE))
+               return -EINVAL;
+
+       return 0;
+}
+
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+                         unsigned long prot, unsigned long flags,
+                         unsigned long fd, unsigned long pgoff)
+{
+       /* The shift for mmap2 is constant, regardless of PAGE_SIZE setting. */
+       if (pgoff & ((1 << (PAGE_SHIFT - 12)) - 1))
+               return -EINVAL;
+
+       pgoff >>= PAGE_SHIFT - 12;
+
+       return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
+asmlinkage int sys_metag_setglobalbit(char __user *addr, int mask)
+{
+       char tmp;
+       int ret = 0;
+       unsigned int flags;
+
+       if (!((__force unsigned int)addr >= LINCORE_BASE))
+               return -EFAULT;
+
+       __global_lock2(flags);
+
+       metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+       ret = __get_user(tmp, addr);
+       if (ret)
+               goto out;
+       tmp |= mask;
+       ret = __put_user(tmp, addr);
+
+       metag_data_cache_flush((__force void *)addr, sizeof(mask));
+
+out:
+       __global_unlock2(flags);
+
+       return ret;
+}
+
+#define TXDEFR_FPU_MASK ((0x1f << 16) | 0x1f)
+
+asmlinkage void sys_metag_set_fpu_flags(unsigned int flags)
+{
+       unsigned int temp;
+
+       flags &= TXDEFR_FPU_MASK;
+
+       temp = __core_reg_get(TXDEFR);
+       temp &= ~TXDEFR_FPU_MASK;
+       temp |= flags;
+       __core_reg_set(TXDEFR, temp);
+}
+
+asmlinkage int sys_metag_set_tls(void __user *ptr)
+{
+       current->thread.tls_ptr = ptr;
+       set_gateway_tls(ptr);
+
+       return 0;
+}
+
+asmlinkage void *sys_metag_get_tls(void)
+{
+       return (__force void *)current->thread.tls_ptr;
+}
+
+asmlinkage long sys_truncate64_metag(const char __user *path, unsigned long lo,
+                                    unsigned long hi)
+{
+       return sys_truncate64(path, merge_64(hi, lo));
+}
+
+asmlinkage long sys_ftruncate64_metag(unsigned int fd, unsigned long lo,
+                                     unsigned long hi)
+{
+       return sys_ftruncate64(fd, merge_64(hi, lo));
+}
+
+asmlinkage long sys_fadvise64_64_metag(int fd, unsigned long offs_lo,
+                                      unsigned long offs_hi,
+                                      unsigned long len_lo,
+                                      unsigned long len_hi, int advice)
+{
+       return sys_fadvise64_64(fd, merge_64(offs_hi, offs_lo),
+                               merge_64(len_hi, len_lo), advice);
+}
+
+asmlinkage long sys_readahead_metag(int fd, unsigned long lo, unsigned long hi,
+                                   size_t count)
+{
+       return sys_readahead(fd, merge_64(hi, lo), count);
+}
+
+asmlinkage ssize_t sys_pread64_metag(unsigned long fd, char __user *buf,
+                                    size_t count, unsigned long lo,
+                                    unsigned long hi)
+{
+       return sys_pread64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage ssize_t sys_pwrite64_metag(unsigned long fd, char __user *buf,
+                                     size_t count, unsigned long lo,
+                                     unsigned long hi)
+{
+       return sys_pwrite64(fd, buf, count, merge_64(hi, lo));
+}
+
+asmlinkage long sys_sync_file_range_metag(int fd, unsigned long offs_lo,
+                                         unsigned long offs_hi,
+                                         unsigned long len_lo,
+                                         unsigned long len_hi,
+                                         unsigned int flags)
+{
+       return sys_sync_file_range(fd, merge_64(offs_hi, offs_lo),
+                                  merge_64(len_hi, len_lo), flags);
+}
+
+/* Provide the actual syscall number to call mapping. */
+#undef __SYSCALL
+#define __SYSCALL(nr, call) [nr] = (call),
+
+/*
+ * We need wrappers for anything with unaligned 64bit arguments
+ */
+#define sys_truncate64         sys_truncate64_metag
+#define sys_ftruncate64                sys_ftruncate64_metag
+#define sys_fadvise64_64       sys_fadvise64_64_metag
+#define sys_readahead          sys_readahead_metag
+#define sys_pread64            sys_pread64_metag
+#define sys_pwrite64           sys_pwrite64_metag
+#define sys_sync_file_range    sys_sync_file_range_metag
+
+/*
+ * Note that we can't include <linux/unistd.h> here since the header
+ * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well.
+ */
+const void *sys_call_table[__NR_syscalls] = {
+       [0 ... __NR_syscalls-1] = sys_ni_syscall,
+#include <asm/unistd.h>
+};
diff --git a/arch/metag/kernel/tbiunexp.S b/arch/metag/kernel/tbiunexp.S
new file mode 100644 (file)
index 0000000..907bbe0
--- /dev/null
@@ -0,0 +1,22 @@
+/* Pass a breakpoint through to Codescape */
+
+#include <asm/tbx.h>
+
+       .text
+        .global        ___TBIUnExpXXX
+        .type   ___TBIUnExpXXX,function
+___TBIUnExpXXX:
+       TSTT    D0Ar2,#TBICTX_CRIT_BIT  ! Result of nestable int call?
+       BZ      $LTBINormCase           ! UnExpXXX at background level
+       MOV     D0Re0,TXMASKI           ! Read TXMASKI
+       XOR     TXMASKI,D1Re0,D1Re0     ! Turn off BGNDHALT handling!
+       OR      D0Ar2,D0Ar2,D0Re0       ! Preserve bits cleared
+$LTBINormCase:
+       MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2       ! Save args on stack
+       SETL    [A0StP++],D0Ar2,D1Ar1   ! Init area for returned values
+       SWITCH  #0xC20208               ! Total stack frame size 8 Dwords
+                                       !            write back size 2 Dwords
+       GETL    D0Re0,D1Re0,[--A0StP]   ! Get result
+       SUB     A0StP,A0StP,#(8*3)      ! Recover stack frame
+       MOV     PC,D1RtP
+        .size          ___TBIUnExpXXX,.-___TBIUnExpXXX
diff --git a/arch/metag/kernel/tcm.c b/arch/metag/kernel/tcm.c
new file mode 100644 (file)
index 0000000..5d102b3
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/stddef.h>
+#include <linux/genalloc.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <asm/tcm.h>
+
+struct tcm_pool {
+       struct list_head list;
+       unsigned int tag;
+       unsigned long start;
+       unsigned long end;
+       struct gen_pool *pool;
+};
+
+static LIST_HEAD(pool_list);
+
+static struct tcm_pool *find_pool(unsigned int tag)
+{
+       struct list_head *lh;
+       struct tcm_pool *pool;
+
+       list_for_each(lh, &pool_list) {
+               pool = list_entry(lh, struct tcm_pool, list);
+               if (pool->tag == tag)
+                       return pool;
+       }
+
+       return NULL;
+}
+
+/**
+ * tcm_alloc - allocate memory from a TCM pool
+ * @tag: tag of the pool to allocate memory from
+ * @len: number of bytes to be allocated
+ *
+ * Allocate the requested number of bytes from the pool matching
+ * the specified tag. Returns the address of the allocated memory
+ * or zero on failure.
+ */
+unsigned long tcm_alloc(unsigned int tag, size_t len)
+{
+       unsigned long vaddr;
+       struct tcm_pool *pool;
+
+       pool = find_pool(tag);
+       if (!pool)
+               return 0;
+
+       vaddr = gen_pool_alloc(pool->pool, len);
+       if (!vaddr)
+               return 0;
+
+       return vaddr;
+}
+
+/**
+ * tcm_free - free a block of memory to a TCM pool
+ * @tag: tag of the pool to free memory to
+ * @addr: address of the memory to be freed
+ * @len: number of bytes to be freed
+ *
+ * Free the requested number of bytes at a specific address to the
+ * pool matching the specified tag.
+ */
+void tcm_free(unsigned int tag, unsigned long addr, size_t len)
+{
+       struct tcm_pool *pool;
+
+       pool = find_pool(tag);
+       if (!pool)
+               return;
+       gen_pool_free(pool->pool, addr, len);
+}
+
+/**
+ * tcm_lookup_tag - find the tag matching an address
+ * @p: memory address to lookup the tag for
+ *
+ * Find the tag of the tcm memory region that contains the
+ * specified address. Returns %TCM_INVALID_TAG if no such
+ * memory region could be found.
+ */
+unsigned int tcm_lookup_tag(unsigned long p)
+{
+       struct list_head *lh;
+       struct tcm_pool *pool;
+       unsigned long addr = (unsigned long) p;
+
+       list_for_each(lh, &pool_list) {
+               pool = list_entry(lh, struct tcm_pool, list);
+               if (addr >= pool->start && addr < pool->end)
+                       return pool->tag;
+       }
+
+       return TCM_INVALID_TAG;
+}
+
+/**
+ * tcm_add_region - add a memory region to TCM pool list
+ * @reg: descriptor of region to be added
+ *
+ * Add a region of memory to the TCM pool list. Returns 0 on success.
+ */
+int __init tcm_add_region(struct tcm_region *reg)
+{
+       struct tcm_pool *pool;
+
+       pool = kmalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool) {
+               pr_err("Failed to alloc memory for TCM pool!\n");
+               return -ENOMEM;
+       }
+
+       pool->tag = reg->tag;
+       pool->start = reg->res.start;
+       pool->end = reg->res.end;
+
+       /*
+        * 2^3 = 8 bytes granularity to allow for 64bit access alignment.
+        * -1 = NUMA node specifier.
+        */
+       pool->pool = gen_pool_create(3, -1);
+
+       if (!pool->pool) {
+               pr_err("Failed to create TCM pool!\n");
+               kfree(pool);
+               return -ENOMEM;
+       }
+
+       if (gen_pool_add(pool->pool, reg->res.start,
+                        reg->res.end - reg->res.start + 1, -1)) {
+               pr_err("Failed to add memory to TCM pool!\n");
+               return -ENOMEM;
+       }
+       pr_info("Added %s TCM pool (%08x bytes @ %08x)\n",
+               reg->res.name, reg->res.end - reg->res.start + 1,
+               reg->res.start);
+
+       list_add_tail(&pool->list, &pool_list);
+
+       return 0;
+}
diff --git a/arch/metag/kernel/time.c b/arch/metag/kernel/time.c
new file mode 100644 (file)
index 0000000..17dc107
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This file contains the Meta-specific time handling details.
+ *
+ */
+
+#include <linux/init.h>
+
+#include <clocksource/metag_generic.h>
+
+void __init time_init(void)
+{
+       metag_generic_timer_init();
+}
diff --git a/arch/metag/kernel/topology.c b/arch/metag/kernel/topology.c
new file mode 100644 (file)
index 0000000..bec3dec
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2010  Imagination Technolohies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/cpu.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/node.h>
+#include <linux/nodemask.h>
+#include <linux/topology.h>
+
+#include <asm/cpu.h>
+
+DEFINE_PER_CPU(struct cpuinfo_metag, cpu_data);
+
+cpumask_t cpu_core_map[NR_CPUS];
+
+static cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+       return *cpu_possible_mask;
+}
+
+const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
+{
+       return &cpu_core_map[cpu];
+}
+
+int arch_update_cpu_topology(void)
+{
+       unsigned int cpu;
+
+       for_each_possible_cpu(cpu)
+               cpu_core_map[cpu] = cpu_coregroup_map(cpu);
+
+       return 0;
+}
+
+static int __init topology_init(void)
+{
+       int i, ret;
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+       for_each_online_node(i)
+               register_one_node(i);
+#endif
+
+       for_each_present_cpu(i) {
+               struct cpuinfo_metag *cpuinfo = &per_cpu(cpu_data, i);
+#ifdef CONFIG_HOTPLUG_CPU
+               cpuinfo->cpu.hotpluggable = 1;
+#endif
+               ret = register_cpu(&cpuinfo->cpu, i);
+               if (unlikely(ret))
+                       pr_warn("%s: register_cpu %d failed (%d)\n",
+                               __func__, i, ret);
+       }
+
+#if defined(CONFIG_NUMA) && !defined(CONFIG_SMP)
+       /*
+        * In the UP case, make sure the CPU association is still
+        * registered under each node. Without this, sysfs fails
+        * to make the connection between nodes other than node0
+        * and cpu0.
+        */
+       for_each_online_node(i)
+               if (i != numa_node_id())
+                       register_cpu_under_node(raw_smp_processor_id(), i);
+#endif
+
+       return 0;
+}
+subsys_initcall(topology_init);
diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c
new file mode 100644 (file)
index 0000000..8961f24
--- /dev/null
@@ -0,0 +1,995 @@
+/*
+ *  Meta exception handling.
+ *
+ *  Copyright (C) 2005,2006,2007,2008,2009,2012 Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/preempt.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/kdebug.h>
+#include <linux/kexec.h>
+#include <linux/unistd.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/syscalls.h>
+
+#include <asm/bug.h>
+#include <asm/core_reg.h>
+#include <asm/irqflags.h>
+#include <asm/siginfo.h>
+#include <asm/traps.h>
+#include <asm/hwthread.h>
+#include <asm/switch.h>
+#include <asm/user_gateway.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
+
+/* Passing syscall arguments as long long is quicker. */
+typedef unsigned int (*LPSYSCALL) (unsigned long long,
+                                  unsigned long long,
+                                  unsigned long long);
+
+/*
+ * Users of LNKSET should compare the bus error bits obtained from DEFR
+ * against TXDEFR_LNKSET_SUCCESS only as the failure code will vary between
+ * different cores revisions.
+ */
+#define TXDEFR_LNKSET_SUCCESS 0x02000000
+#define TXDEFR_LNKSET_FAILURE 0x04000000
+
+/*
+ * Our global TBI handle.  Initialised from setup.c/setup_arch.
+ */
+DECLARE_PER_CPU(PTBI, pTBI);
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(unsigned int, trigger_mask);
+#else
+unsigned int global_trigger_mask;
+EXPORT_SYMBOL(global_trigger_mask);
+#endif
+
+unsigned long per_cpu__stack_save[NR_CPUS];
+
+static const char * const trap_names[] = {
+       [TBIXXF_SIGNUM_IIF] = "Illegal instruction fault",
+       [TBIXXF_SIGNUM_PGF] = "Privilege violation",
+       [TBIXXF_SIGNUM_DHF] = "Unaligned data access fault",
+       [TBIXXF_SIGNUM_IGF] = "Code fetch general read failure",
+       [TBIXXF_SIGNUM_DGF] = "Data access general read/write fault",
+       [TBIXXF_SIGNUM_IPF] = "Code fetch page fault",
+       [TBIXXF_SIGNUM_DPF] = "Data access page fault",
+       [TBIXXF_SIGNUM_IHF] = "Instruction breakpoint",
+       [TBIXXF_SIGNUM_DWF] = "Read-only data access fault",
+};
+
+const char *trap_name(int trapno)
+{
+       if (trapno >= 0 && trapno < ARRAY_SIZE(trap_names)
+                       && trap_names[trapno])
+               return trap_names[trapno];
+       return "Unknown fault";
+}
+
+static DEFINE_SPINLOCK(die_lock);
+
+void die(const char *str, struct pt_regs *regs, long err,
+        unsigned long addr)
+{
+       static int die_counter;
+
+       oops_enter();
+
+       spin_lock_irq(&die_lock);
+       console_verbose();
+       bust_spinlocks(1);
+       pr_err("%s: err %04lx (%s) addr %08lx [#%d]\n", str, err & 0xffff,
+              trap_name(err & 0xffff), addr, ++die_counter);
+
+       print_modules();
+       show_regs(regs);
+
+       pr_err("Process: %s (pid: %d, stack limit = %p)\n", current->comm,
+              task_pid_nr(current), task_stack_page(current) + THREAD_SIZE);
+
+       bust_spinlocks(0);
+       add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+       if (kexec_should_crash(current))
+               crash_kexec(regs);
+
+       if (in_interrupt())
+               panic("Fatal exception in interrupt");
+
+       if (panic_on_oops)
+               panic("Fatal exception");
+
+       spin_unlock_irq(&die_lock);
+       oops_exit();
+       do_exit(SIGSEGV);
+}
+
+#ifdef CONFIG_METAG_DSP
+/*
+ * The ECH encoding specifies the size of a DSPRAM as,
+ *
+ *             "slots" / 4
+ *
+ * A "slot" is the size of two DSPRAM bank entries; an entry from
+ * DSPRAM bank A and an entry from DSPRAM bank B. One DSPRAM bank
+ * entry is 4 bytes.
+ */
+#define SLOT_SZ        8
+static inline unsigned int decode_dspram_size(unsigned int size)
+{
+       unsigned int _sz = size & 0x7f;
+
+       return _sz * SLOT_SZ * 4;
+}
+
+static void dspram_save(struct meta_ext_context *dsp_ctx,
+                       unsigned int ramA_sz, unsigned int ramB_sz)
+{
+       unsigned int ram_sz[2];
+       int i;
+
+       ram_sz[0] = ramA_sz;
+       ram_sz[1] = ramB_sz;
+
+       for (i = 0; i < 2; i++) {
+               if (ram_sz[i] != 0) {
+                       unsigned int sz;
+
+                       if (i == 0)
+                               sz = decode_dspram_size(ram_sz[i] >> 8);
+                       else
+                               sz = decode_dspram_size(ram_sz[i]);
+
+                       if (dsp_ctx->ram[i] == NULL) {
+                               dsp_ctx->ram[i] = kmalloc(sz, GFP_KERNEL);
+
+                               if (dsp_ctx->ram[i] == NULL)
+                                       panic("couldn't save DSP context");
+                       } else {
+                               if (ram_sz[i] > dsp_ctx->ram_sz[i]) {
+                                       kfree(dsp_ctx->ram[i]);
+
+                                       dsp_ctx->ram[i] = kmalloc(sz,
+                                                                 GFP_KERNEL);
+
+                                       if (dsp_ctx->ram[i] == NULL)
+                                               panic("couldn't save DSP context");
+                               }
+                       }
+
+                       if (i == 0)
+                               __TBIDspramSaveA(ram_sz[i], dsp_ctx->ram[i]);
+                       else
+                               __TBIDspramSaveB(ram_sz[i], dsp_ctx->ram[i]);
+
+                       dsp_ctx->ram_sz[i] = ram_sz[i];
+               }
+       }
+}
+#endif /* CONFIG_METAG_DSP */
+
+/*
+ * Allow interrupts to be nested and save any "extended" register
+ * context state, e.g. DSP regs and RAMs.
+ */
+static void nest_interrupts(TBIRES State, unsigned long mask)
+{
+#ifdef CONFIG_METAG_DSP
+       struct meta_ext_context *dsp_ctx;
+       unsigned int D0_8;
+
+       /*
+        * D0.8 may contain an ECH encoding. The upper 16 bits
+        * tell us what DSP resources the current process is
+        * using. OR the bits into the SaveMask so that
+        * __TBINestInts() knows what resources to save as
+        * part of this context.
+        *
+        * Don't save the context if we're nesting interrupts in the
+        * kernel because the kernel doesn't use DSP hardware.
+        */
+       D0_8 = __core_reg_get(D0.8);
+
+       if (D0_8 && (State.Sig.SaveMask & TBICTX_PRIV_BIT)) {
+               State.Sig.SaveMask |= (D0_8 >> 16);
+
+               dsp_ctx = current->thread.dsp_context;
+               if (dsp_ctx == NULL) {
+                       dsp_ctx = kzalloc(sizeof(*dsp_ctx), GFP_KERNEL);
+                       if (dsp_ctx == NULL)
+                               panic("couldn't save DSP context: ENOMEM");
+
+                       current->thread.dsp_context = dsp_ctx;
+               }
+
+               current->thread.user_flags |= (D0_8 & 0xffff0000);
+               __TBINestInts(State, &dsp_ctx->regs, mask);
+               dspram_save(dsp_ctx, D0_8 & 0x7f00, D0_8 & 0x007f);
+       } else
+               __TBINestInts(State, NULL, mask);
+#else
+       __TBINestInts(State, NULL, mask);
+#endif
+}
+
+void head_end(TBIRES State, unsigned long mask)
+{
+       unsigned int savemask = (unsigned short)State.Sig.SaveMask;
+       unsigned int ctx_savemask = (unsigned short)State.Sig.pCtx->SaveMask;
+
+       if (savemask & TBICTX_PRIV_BIT) {
+               ctx_savemask |= TBICTX_PRIV_BIT;
+               current->thread.user_flags = savemask;
+       }
+
+       /* Always undo the sleep bit */
+       ctx_savemask &= ~TBICTX_WAIT_BIT;
+
+       /* Always save the catch buffer and RD pipe if they are dirty */
+       savemask |= TBICTX_XCBF_BIT;
+
+       /* Only save the catch and RD if we have not already done so.
+        * Note - the RD bits are in the pCtx only, and not in the
+        * State.SaveMask.
+        */
+       if ((savemask & TBICTX_CBUF_BIT) ||
+           (ctx_savemask & TBICTX_CBRP_BIT)) {
+               /* Have we already saved the buffers though?
+                * - See TestTrack 5071 */
+               if (ctx_savemask & TBICTX_XCBF_BIT) {
+                       /* Strip off the bits so the call to __TBINestInts
+                        * won't save the buffers again. */
+                       savemask &= ~TBICTX_CBUF_BIT;
+                       ctx_savemask &= ~TBICTX_CBRP_BIT;
+               }
+       }
+
+#ifdef CONFIG_METAG_META21
+       {
+               unsigned int depth, txdefr;
+
+               /*
+                * Save TXDEFR state.
+                *
+                * The process may have been interrupted after a LNKSET, but
+                * before it could read the DEFR state, so we mustn't lose that
+                * state or it could end up retrying an atomic operation that
+                * succeeded.
+                *
+                * All interrupts are disabled at this point so we
+                * don't need to perform any locking. We must do this
+                * dance before we use LNKGET or LNKSET.
+                */
+               BUG_ON(current->thread.int_depth > HARDIRQ_BITS);
+
+               depth = current->thread.int_depth++;
+
+               txdefr = __core_reg_get(TXDEFR);
+
+               txdefr &= TXDEFR_BUS_STATE_BITS;
+               if (txdefr & TXDEFR_LNKSET_SUCCESS)
+                       current->thread.txdefr_failure &= ~(1 << depth);
+               else
+                       current->thread.txdefr_failure |= (1 << depth);
+       }
+#endif
+
+       State.Sig.SaveMask = savemask;
+       State.Sig.pCtx->SaveMask = ctx_savemask;
+
+       nest_interrupts(State, mask);
+
+#ifdef CONFIG_METAG_POISON_CATCH_BUFFERS
+       /* Poison the catch registers.  This shows up any mistakes we have
+        * made in their handling MUCH quicker.
+        */
+       __core_reg_set(TXCATCH0, 0x87650021);
+       __core_reg_set(TXCATCH1, 0x87654322);
+       __core_reg_set(TXCATCH2, 0x87654323);
+       __core_reg_set(TXCATCH3, 0x87654324);
+#endif /* CONFIG_METAG_POISON_CATCH_BUFFERS */
+}
+
+TBIRES tail_end_sys(TBIRES State, int syscall, int *restart)
+{
+       struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+       unsigned long flags;
+
+       local_irq_disable();
+
+       if (user_mode(regs)) {
+               flags = current_thread_info()->flags;
+               if (flags & _TIF_WORK_MASK &&
+                   do_work_pending(regs, flags, syscall)) {
+                       *restart = 1;
+                       return State;
+               }
+
+#ifdef CONFIG_METAG_FPU
+               if (current->thread.fpu_context &&
+                   current->thread.fpu_context->needs_restore) {
+                       __TBICtxFPURestore(State, current->thread.fpu_context);
+                       /*
+                        * Clearing this bit ensures the FP unit is not made
+                        * active again unless it is used.
+                        */
+                       State.Sig.SaveMask &= ~TBICTX_FPAC_BIT;
+                       current->thread.fpu_context->needs_restore = false;
+               }
+               State.Sig.TrigMask |= TBI_TRIG_BIT(TBID_SIGNUM_DFR);
+#endif
+       }
+
+       /* TBI will turn interrupts back on at some point. */
+       if (!irqs_disabled_flags((unsigned long)State.Sig.TrigMask))
+               trace_hardirqs_on();
+
+#ifdef CONFIG_METAG_DSP
+       /*
+        * If we previously saved an extended context then restore it
+        * now. Otherwise, clear D0.8 because this process is not
+        * using DSP hardware.
+        */
+       if (State.Sig.pCtx->SaveMask & TBICTX_XEXT_BIT) {
+               unsigned int D0_8;
+               struct meta_ext_context *dsp_ctx = current->thread.dsp_context;
+
+               /* Make sure we're going to return to userland. */
+               BUG_ON(current->thread.int_depth != 1);
+
+               if (dsp_ctx->ram_sz[0] > 0)
+                       __TBIDspramRestoreA(dsp_ctx->ram_sz[0],
+                                           dsp_ctx->ram[0]);
+               if (dsp_ctx->ram_sz[1] > 0)
+                       __TBIDspramRestoreB(dsp_ctx->ram_sz[1],
+                                           dsp_ctx->ram[1]);
+
+               State.Sig.SaveMask |= State.Sig.pCtx->SaveMask;
+               __TBICtxRestore(State, current->thread.dsp_context);
+               D0_8 = __core_reg_get(D0.8);
+               D0_8 |= current->thread.user_flags & 0xffff0000;
+               D0_8 |= (dsp_ctx->ram_sz[1] | dsp_ctx->ram_sz[0]) & 0xffff;
+               __core_reg_set(D0.8, D0_8);
+       } else
+               __core_reg_set(D0.8, 0);
+#endif /* CONFIG_METAG_DSP */
+
+#ifdef CONFIG_METAG_META21
+       {
+               unsigned int depth, txdefr;
+
+               /*
+                * If there hasn't been a LNKSET since the last LNKGET then the
+                * link flag will be set, causing the next LNKSET to succeed if
+                * the addresses match. The two LNK operations may not be a pair
+                * (e.g. see atomic_read()), so the LNKSET should fail.
+                * We use a conditional-never LNKSET to clear the link flag
+                * without side effects.
+                */
+               asm volatile("LNKSETDNV [D0Re0],D0Re0");
+
+               depth = --current->thread.int_depth;
+
+               BUG_ON(user_mode(regs) && depth);
+
+               txdefr = __core_reg_get(TXDEFR);
+
+               txdefr &= ~TXDEFR_BUS_STATE_BITS;
+
+               /* Do we need to restore a failure code into TXDEFR? */
+               if (current->thread.txdefr_failure & (1 << depth))
+                       txdefr |= (TXDEFR_LNKSET_FAILURE | TXDEFR_BUS_TRIG_BIT);
+               else
+                       txdefr |= (TXDEFR_LNKSET_SUCCESS | TXDEFR_BUS_TRIG_BIT);
+
+               __core_reg_set(TXDEFR, txdefr);
+       }
+#endif
+       return State;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * If we took an interrupt in the middle of __kuser_get_tls then we need
+ * to rewind the PC to the start of the function in case the process
+ * gets migrated to another thread (SMP only) and it reads the wrong tls
+ * data.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+       unsigned long get_tls_start;
+       unsigned long get_tls_end;
+
+       get_tls_start = (unsigned long)__kuser_get_tls -
+               (unsigned long)&__user_gateway_start;
+
+       get_tls_start += USER_GATEWAY_PAGE;
+
+       get_tls_end = (unsigned long)__kuser_get_tls_end -
+               (unsigned long)&__user_gateway_start;
+
+       get_tls_end += USER_GATEWAY_PAGE;
+
+       if ((State.Sig.pCtx->CurrPC >= get_tls_start) &&
+           (State.Sig.pCtx->CurrPC < get_tls_end))
+               State.Sig.pCtx->CurrPC = get_tls_start;
+}
+#else
+/*
+ * If we took an interrupt in the middle of
+ * __kuser_cmpxchg then we need to rewind the PC to the
+ * start of the function.
+ */
+static inline void _restart_critical_section(TBIRES State)
+{
+       unsigned long cmpxchg_start;
+       unsigned long cmpxchg_end;
+
+       cmpxchg_start = (unsigned long)__kuser_cmpxchg -
+               (unsigned long)&__user_gateway_start;
+
+       cmpxchg_start += USER_GATEWAY_PAGE;
+
+       cmpxchg_end = (unsigned long)__kuser_cmpxchg_end -
+               (unsigned long)&__user_gateway_start;
+
+       cmpxchg_end += USER_GATEWAY_PAGE;
+
+       if ((State.Sig.pCtx->CurrPC >= cmpxchg_start) &&
+           (State.Sig.pCtx->CurrPC < cmpxchg_end))
+               State.Sig.pCtx->CurrPC = cmpxchg_start;
+}
+#endif
+
+/* Used by kick_handler() */
+void restart_critical_section(TBIRES State)
+{
+       _restart_critical_section(State);
+}
+
+TBIRES trigger_handler(TBIRES State, int SigNum, int Triggers, int Inst,
+                      PTBI pTBI)
+{
+       head_end(State, ~INTS_OFF_MASK);
+
+       /* If we interrupted user code handle any critical sections. */
+       if (State.Sig.SaveMask & TBICTX_PRIV_BIT)
+               _restart_critical_section(State);
+
+       trace_hardirqs_off();
+
+       do_IRQ(SigNum, (struct pt_regs *)State.Sig.pCtx);
+
+       return tail_end(State);
+}
+
+static unsigned int load_fault(PTBICTXEXTCB0 pbuf)
+{
+       return pbuf->CBFlags & TXCATCH0_READ_BIT;
+}
+
+static unsigned long fault_address(PTBICTXEXTCB0 pbuf)
+{
+       return pbuf->CBAddr;
+}
+
+static void unhandled_fault(struct pt_regs *regs, unsigned long addr,
+                           int signo, int code, int trapno)
+{
+       if (user_mode(regs)) {
+               siginfo_t info;
+
+               if (show_unhandled_signals && unhandled_signal(current, signo)
+                   && printk_ratelimit()) {
+
+                       pr_info("pid %d unhandled fault: pc 0x%08x, addr 0x%08lx, trap %d (%s)\n",
+                               current->pid, regs->ctx.CurrPC, addr,
+                               trapno, trap_name(trapno));
+                       print_vma_addr(" in ", regs->ctx.CurrPC);
+                       print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+                       printk("\n");
+                       show_regs(regs);
+               }
+
+               info.si_signo = signo;
+               info.si_errno = 0;
+               info.si_code = code;
+               info.si_addr = (__force void __user *)addr;
+               info.si_trapno = trapno;
+               force_sig_info(signo, &info, current);
+       } else {
+               die("Oops", regs, trapno, addr);
+       }
+}
+
+static int handle_data_fault(PTBICTXEXTCB0 pcbuf, struct pt_regs *regs,
+                            unsigned int data_address, int trapno)
+{
+       int ret;
+
+       ret = do_page_fault(regs, data_address, !load_fault(pcbuf), trapno);
+
+       return ret;
+}
+
+static unsigned long get_inst_fault_address(struct pt_regs *regs)
+{
+       return regs->ctx.CurrPC;
+}
+
+TBIRES fault_handler(TBIRES State, int SigNum, int Triggers,
+                    int Inst, PTBI pTBI)
+{
+       struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+       PTBICTXEXTCB0 pcbuf = (PTBICTXEXTCB0)&regs->extcb0;
+       unsigned long data_address;
+
+       head_end(State, ~INTS_OFF_MASK);
+
+       /* Hardware breakpoint or data watch */
+       if ((SigNum == TBIXXF_SIGNUM_IHF) ||
+           ((SigNum == TBIXXF_SIGNUM_DHF) &&
+            (pcbuf[0].CBFlags & (TXCATCH0_WATCH1_BIT |
+                                 TXCATCH0_WATCH0_BIT)))) {
+               State = __TBIUnExpXXX(State, SigNum, Triggers, Inst,
+                                     pTBI);
+               return tail_end(State);
+       }
+
+       local_irq_enable();
+
+       data_address = fault_address(pcbuf);
+
+       switch (SigNum) {
+       case TBIXXF_SIGNUM_IGF:
+               /* 1st-level entry invalid (instruction fetch) */
+       case TBIXXF_SIGNUM_IPF: {
+               /* 2nd-level entry invalid (instruction fetch) */
+               unsigned long addr = get_inst_fault_address(regs);
+               do_page_fault(regs, addr, 0, SigNum);
+               break;
+       }
+
+       case TBIXXF_SIGNUM_DGF:
+               /* 1st-level entry invalid (data access) */
+       case TBIXXF_SIGNUM_DPF:
+               /* 2nd-level entry invalid (data access) */
+       case TBIXXF_SIGNUM_DWF:
+               /* Write to read only page */
+               handle_data_fault(pcbuf, regs, data_address, SigNum);
+               break;
+
+       case TBIXXF_SIGNUM_IIF:
+               /* Illegal instruction */
+               unhandled_fault(regs, regs->ctx.CurrPC, SIGILL, ILL_ILLOPC,
+                               SigNum);
+               break;
+
+       case TBIXXF_SIGNUM_DHF:
+               /* Unaligned access */
+               unhandled_fault(regs, data_address, SIGBUS, BUS_ADRALN,
+                               SigNum);
+               break;
+       case TBIXXF_SIGNUM_PGF:
+               /* Privilege violation */
+               unhandled_fault(regs, data_address, SIGSEGV, SEGV_ACCERR,
+                               SigNum);
+               break;
+       default:
+               BUG();
+               break;
+       }
+
+       return tail_end(State);
+}
+
+static bool switch_is_syscall(unsigned int inst)
+{
+       return inst == __METAG_SW_ENCODING(SYS);
+}
+
+static bool switch_is_legacy_syscall(unsigned int inst)
+{
+       return inst == __METAG_SW_ENCODING(SYS_LEGACY);
+}
+
+static inline void step_over_switch(struct pt_regs *regs, unsigned int inst)
+{
+       regs->ctx.CurrPC += 4;
+}
+
+static inline int test_syscall_work(void)
+{
+       return current_thread_info()->flags & _TIF_WORK_SYSCALL_MASK;
+}
+
+TBIRES switch1_handler(TBIRES State, int SigNum, int Triggers,
+                      int Inst, PTBI pTBI)
+{
+       struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+       unsigned int sysnumber;
+       unsigned long long a1_a2, a3_a4, a5_a6;
+       LPSYSCALL syscall_entry;
+       int restart;
+
+       head_end(State, ~INTS_OFF_MASK);
+
+       /*
+        * If this is not a syscall SWITCH it could be a breakpoint.
+        */
+       if (!switch_is_syscall(Inst)) {
+               /*
+                * Alert the user if they're trying to use legacy system
+                * calls. This suggests they need to update their C
+                * library and build against up to date kernel headers.
+                */
+               if (switch_is_legacy_syscall(Inst))
+                       pr_warn_once("WARNING: A legacy syscall was made. Your userland needs updating.\n");
+               /*
+                * We don't know how to handle the SWITCH and cannot
+                * safely ignore it, so treat all unknown switches
+                * (including breakpoints) as traps.
+                */
+               force_sig(SIGTRAP, current);
+               return tail_end(State);
+       }
+
+       local_irq_enable();
+
+restart_syscall:
+       restart = 0;
+       sysnumber = regs->ctx.DX[0].U1;
+
+       if (test_syscall_work())
+               sysnumber = syscall_trace_enter(regs);
+
+       /* Skip over the SWITCH instruction - or you just get 'stuck' on it! */
+       step_over_switch(regs, Inst);
+
+       if (sysnumber >= __NR_syscalls) {
+               pr_debug("unknown syscall number: %d\n", sysnumber);
+               syscall_entry = (LPSYSCALL) sys_ni_syscall;
+       } else {
+               syscall_entry = (LPSYSCALL) sys_call_table[sysnumber];
+       }
+
+       /* Use 64bit loads for speed. */
+       a5_a6 = *(unsigned long long *)&regs->ctx.DX[1];
+       a3_a4 = *(unsigned long long *)&regs->ctx.DX[2];
+       a1_a2 = *(unsigned long long *)&regs->ctx.DX[3];
+
+       /* here is the actual call to the syscall handler functions */
+       regs->ctx.DX[0].U0 = syscall_entry(a1_a2, a3_a4, a5_a6);
+
+       if (test_syscall_work())
+               syscall_trace_leave(regs);
+
+       State = tail_end_sys(State, sysnumber, &restart);
+       /* Handlerless restarts shouldn't go via userland */
+       if (restart)
+               goto restart_syscall;
+       return State;
+}
+
+TBIRES switchx_handler(TBIRES State, int SigNum, int Triggers,
+                      int Inst, PTBI pTBI)
+{
+       struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+
+       /*
+        * This can be caused by any user process simply executing an unusual
+        * SWITCH instruction. If there's no DA, __TBIUnExpXXX will cause the
+        * thread to stop, so signal a SIGTRAP instead.
+        */
+       head_end(State, ~INTS_OFF_MASK);
+       if (user_mode(regs))
+               force_sig(SIGTRAP, current);
+       else
+               State = __TBIUnExpXXX(State, SigNum, Triggers, Inst, pTBI);
+       return tail_end(State);
+}
+
+#ifdef CONFIG_METAG_META21
+TBIRES fpe_handler(TBIRES State, int SigNum, int Triggers, int Inst, PTBI pTBI)
+{
+       struct pt_regs *regs = (struct pt_regs *)State.Sig.pCtx;
+       unsigned int error_state = Triggers;
+       siginfo_t info;
+
+       head_end(State, ~INTS_OFF_MASK);
+
+       local_irq_enable();
+
+       info.si_signo = SIGFPE;
+
+       if (error_state & TXSTAT_FPE_INVALID_BIT)
+               info.si_code = FPE_FLTINV;
+       else if (error_state & TXSTAT_FPE_DIVBYZERO_BIT)
+               info.si_code = FPE_FLTDIV;
+       else if (error_state & TXSTAT_FPE_OVERFLOW_BIT)
+               info.si_code = FPE_FLTOVF;
+       else if (error_state & TXSTAT_FPE_UNDERFLOW_BIT)
+               info.si_code = FPE_FLTUND;
+       else if (error_state & TXSTAT_FPE_INEXACT_BIT)
+               info.si_code = FPE_FLTRES;
+       else
+               info.si_code = 0;
+       info.si_errno = 0;
+       info.si_addr = (__force void __user *)regs->ctx.CurrPC;
+       force_sig_info(SIGFPE, &info, current);
+
+       return tail_end(State);
+}
+#endif
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+struct traps_context {
+       PTBIAPIFN fnSigs[TBID_SIGNUM_MAX + 1];
+};
+
+static struct traps_context *metag_traps_context;
+
+int traps_save_context(void)
+{
+       unsigned long cpu = smp_processor_id();
+       PTBI _pTBI = per_cpu(pTBI, cpu);
+       struct traps_context *context;
+
+       context = kzalloc(sizeof(*context), GFP_ATOMIC);
+       if (!context)
+               return -ENOMEM;
+
+       memcpy(context->fnSigs, (void *)_pTBI->fnSigs, sizeof(context->fnSigs));
+
+       metag_traps_context = context;
+       return 0;
+}
+
+int traps_restore_context(void)
+{
+       unsigned long cpu = smp_processor_id();
+       PTBI _pTBI = per_cpu(pTBI, cpu);
+       struct traps_context *context = metag_traps_context;
+
+       metag_traps_context = NULL;
+
+       memcpy((void *)_pTBI->fnSigs, context->fnSigs, sizeof(context->fnSigs));
+
+       kfree(context);
+       return 0;
+}
+#endif
+
+#ifdef CONFIG_SMP
+static inline unsigned int _get_trigger_mask(void)
+{
+       unsigned long cpu = smp_processor_id();
+       return per_cpu(trigger_mask, cpu);
+}
+
+unsigned int get_trigger_mask(void)
+{
+       return _get_trigger_mask();
+}
+EXPORT_SYMBOL(get_trigger_mask);
+
+static void set_trigger_mask(unsigned int mask)
+{
+       unsigned long cpu = smp_processor_id();
+       per_cpu(trigger_mask, cpu) = mask;
+}
+
+void arch_local_irq_enable(void)
+{
+       preempt_disable();
+       arch_local_irq_restore(_get_trigger_mask());
+       preempt_enable_no_resched();
+}
+EXPORT_SYMBOL(arch_local_irq_enable);
+#else
+static void set_trigger_mask(unsigned int mask)
+{
+       global_trigger_mask = mask;
+}
+#endif
+
+void __cpuinit per_cpu_trap_init(unsigned long cpu)
+{
+       TBIRES int_context;
+       unsigned int thread = cpu_2_hwthread_id[cpu];
+
+       set_trigger_mask(TBI_INTS_INIT(thread) | /* interrupts */
+                        TBI_TRIG_BIT(TBID_SIGNUM_LWK) | /* low level kick */
+                        TBI_TRIG_BIT(TBID_SIGNUM_SW1) |
+                        TBI_TRIG_BIT(TBID_SIGNUM_SWS));
+
+       /* non-priv - use current stack */
+       int_context.Sig.pCtx = NULL;
+       /* Start with interrupts off */
+       int_context.Sig.TrigMask = INTS_OFF_MASK;
+       int_context.Sig.SaveMask = 0;
+
+       /* And call __TBIASyncTrigger() */
+       __TBIASyncTrigger(int_context);
+}
+
+void __init trap_init(void)
+{
+       unsigned long cpu = smp_processor_id();
+       PTBI _pTBI = per_cpu(pTBI, cpu);
+
+       _pTBI->fnSigs[TBID_SIGNUM_XXF] = fault_handler;
+       _pTBI->fnSigs[TBID_SIGNUM_SW0] = switchx_handler;
+       _pTBI->fnSigs[TBID_SIGNUM_SW1] = switch1_handler;
+       _pTBI->fnSigs[TBID_SIGNUM_SW2] = switchx_handler;
+       _pTBI->fnSigs[TBID_SIGNUM_SW3] = switchx_handler;
+       _pTBI->fnSigs[TBID_SIGNUM_SWK] = kick_handler;
+
+#ifdef CONFIG_METAG_META21
+       _pTBI->fnSigs[TBID_SIGNUM_DFR] = __TBIHandleDFR;
+       _pTBI->fnSigs[TBID_SIGNUM_FPE] = fpe_handler;
+#endif
+
+       per_cpu_trap_init(cpu);
+}
+
+void tbi_startup_interrupt(int irq)
+{
+       unsigned long cpu = smp_processor_id();
+       PTBI _pTBI = per_cpu(pTBI, cpu);
+
+       BUG_ON(irq > TBID_SIGNUM_MAX);
+
+       /* For TR1 and TR2, the thread id is encoded in the irq number */
+       if (irq >= TBID_SIGNUM_T10 && irq < TBID_SIGNUM_TR3)
+               cpu = hwthread_id_2_cpu[(irq - TBID_SIGNUM_T10) % 4];
+
+       set_trigger_mask(get_trigger_mask() | TBI_TRIG_BIT(irq));
+
+       _pTBI->fnSigs[irq] = trigger_handler;
+}
+
+void tbi_shutdown_interrupt(int irq)
+{
+       unsigned long cpu = smp_processor_id();
+       PTBI _pTBI = per_cpu(pTBI, cpu);
+
+       BUG_ON(irq > TBID_SIGNUM_MAX);
+
+       set_trigger_mask(get_trigger_mask() & ~TBI_TRIG_BIT(irq));
+
+       _pTBI->fnSigs[irq] = __TBIUnExpXXX;
+}
+
+int ret_from_fork(TBIRES arg)
+{
+       struct task_struct *prev = arg.Switch.pPara;
+       struct task_struct *tsk = current;
+       struct pt_regs *regs = task_pt_regs(tsk);
+       int (*fn)(void *);
+       TBIRES Next;
+
+       schedule_tail(prev);
+
+       if (tsk->flags & PF_KTHREAD) {
+               fn = (void *)regs->ctx.DX[4].U1;
+               BUG_ON(!fn);
+
+               fn((void *)regs->ctx.DX[3].U1);
+       }
+
+       if (test_syscall_work())
+               syscall_trace_leave(regs);
+
+       preempt_disable();
+
+       Next.Sig.TrigMask = get_trigger_mask();
+       Next.Sig.SaveMask = 0;
+       Next.Sig.pCtx = &regs->ctx;
+
+       set_gateway_tls(current->thread.tls_ptr);
+
+       preempt_enable_no_resched();
+
+       /* And interrupts should come back on when we resume the real usermode
+        * code. Call __TBIASyncResume()
+        */
+       __TBIASyncResume(tail_end(Next));
+       /* ASyncResume should NEVER return */
+       BUG();
+       return 0;
+}
+
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+               struct pt_regs *regs)
+{
+       unsigned long addr;
+#ifdef CONFIG_FRAME_POINTER
+       unsigned long fp, fpnew;
+       unsigned long stack;
+#endif
+
+       if (regs && user_mode(regs))
+               return;
+
+       printk("\nCall trace: ");
+#ifdef CONFIG_KALLSYMS
+       printk("\n");
+#endif
+
+       if (!tsk)
+               tsk = current;
+
+#ifdef CONFIG_FRAME_POINTER
+       if (regs) {
+               print_ip_sym(regs->ctx.CurrPC);
+               fp = regs->ctx.AX[1].U0;
+       } else {
+               fp = __core_reg_get(A0FrP);
+       }
+
+       /* detect when the frame pointer has been used for other purposes and
+        * doesn't point to the stack (it may point completely elsewhere which
+        * kstack_end may not detect).
+        */
+       stack = (unsigned long)task_stack_page(tsk);
+       while (fp >= stack && fp + 8 <= stack + THREAD_SIZE) {
+               addr = __raw_readl((unsigned long *)(fp + 4)) - 4;
+               if (kernel_text_address(addr))
+                       print_ip_sym(addr);
+               else
+                       break;
+               /* stack grows up, so frame pointers must decrease */
+               fpnew = __raw_readl((unsigned long *)(fp + 0));
+               if (fpnew >= fp)
+                       break;
+               fp = fpnew;
+       }
+#else
+       while (!kstack_end(sp)) {
+               addr = (*sp--) - 4;
+               if (kernel_text_address(addr))
+                       print_ip_sym(addr);
+       }
+#endif
+
+       printk("\n");
+
+       debug_show_held_locks(tsk);
+}
+
+void show_stack(struct task_struct *tsk, unsigned long *sp)
+{
+       if (!tsk)
+               tsk = current;
+       if (tsk == current)
+               sp = (unsigned long *)current_stack_pointer;
+       else
+               sp = (unsigned long *)tsk->thread.kernel_context->AX[0].U0;
+
+       show_trace(tsk, sp, NULL);
+}
+
+void dump_stack(void)
+{
+       show_stack(NULL, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
diff --git a/arch/metag/kernel/user_gateway.S b/arch/metag/kernel/user_gateway.S
new file mode 100644 (file)
index 0000000..7167f3e
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2010 Imagination Technologies Ltd.
+ *
+ * This file contains code that can be accessed from userspace and can
+ * access certain kernel data structures without the overhead of a system
+ * call.
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/user_gateway.h>
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory.  This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in some Meta CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries.  The kernel reserves the right to change this code as needed
+ * without warning. Only the entry points and their results are guaranteed
+ * to be stable.
+ *
+ * Each segment is 64-byte aligned.  This mechanism should be used only for
+ * for things that are really small and justified, and not be abused freely.
+ */
+       .text
+       .global ___user_gateway_start
+___user_gateway_start:
+
+       /* get_tls
+        * Offset:       0
+        * Description:  Get the TLS pointer for this process.
+        */
+       .global ___kuser_get_tls
+       .type   ___kuser_get_tls,function
+___kuser_get_tls:
+       MOVT    D1Ar1,#HI(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+       ADD     D1Ar1,D1Ar1,#LO(USER_GATEWAY_PAGE + USER_GATEWAY_TLS)
+       MOV     D1Ar3,TXENABLE
+       AND     D1Ar3,D1Ar3,#(TXENABLE_THREAD_BITS)
+       LSR     D1Ar3,D1Ar3,#(TXENABLE_THREAD_S - 2)
+       GETD    D0Re0,[D1Ar1+D1Ar3]
+___kuser_get_tls_end:          /* Beyond this point the read will complete */
+       MOV     PC,D1RtP
+       .size   ___kuser_get_tls,.-___kuser_get_tls
+       .global ___kuser_get_tls_end
+
+       /* cmpxchg
+        * Offset:       64
+        * Description:  Replace the value at 'ptr' with 'newval' if the current
+        *               value is 'oldval'. Return zero if we succeeded,
+        *               non-zero otherwise.
+        *
+        * Reference prototype:
+        *
+        *      int __kuser_cmpxchg(int oldval, int newval, unsigned long *ptr)
+        *
+        */
+       .balign 64
+       .global ___kuser_cmpxchg
+       .type   ___kuser_cmpxchg,function
+___kuser_cmpxchg:
+#ifdef CONFIG_SMP
+       /*
+        * We must use LNKGET/LNKSET with an SMP kernel because the other method
+        * does not provide atomicity across multiple CPUs.
+        */
+0:     LNKGETD D0Re0,[D1Ar3]
+       CMP     D0Re0,D1Ar1
+       LNKSETDZ [D1Ar3],D0Ar2
+       BNZ     1f
+       DEFR    D0Re0,TXSTAT
+       ANDT    D0Re0,D0Re0,#HI(0x3f000000)
+       CMPT    D0Re0,#HI(0x02000000)
+       BNE     0b
+#ifdef CONFIG_METAG_LNKGET_AROUND_CACHE
+       DCACHE  [D1Ar3], D0Re0
+#endif
+1:     MOV     D0Re0,#1
+       XORZ    D0Re0,D0Re0,D0Re0
+       MOV     PC,D1RtP
+#else
+       GETD    D0Re0,[D1Ar3]
+       CMP     D0Re0,D1Ar1
+       SETDZ   [D1Ar3],D0Ar2
+___kuser_cmpxchg_end:          /* Beyond this point the write will complete */
+       MOV     D0Re0,#1
+       XORZ    D0Re0,D0Re0,D0Re0
+       MOV     PC,D1RtP
+#endif /* CONFIG_SMP */
+       .size   ___kuser_cmpxchg,.-___kuser_cmpxchg
+       .global ___kuser_cmpxchg_end
+
+       .global ___user_gateway_end
+___user_gateway_end:
diff --git a/arch/metag/kernel/vmlinux.lds.S b/arch/metag/kernel/vmlinux.lds.S
new file mode 100644 (file)
index 0000000..e12055e
--- /dev/null
@@ -0,0 +1,71 @@
+/* ld script to make Meta Linux kernel */
+
+#include <asm/thread_info.h>
+#include <asm/page.h>
+#include <asm/cache.h>
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf32-metag", "elf32-metag", "elf32-metag")
+OUTPUT_ARCH(metag)
+ENTRY(__start)
+
+_jiffies = _jiffies_64;
+SECTIONS
+{
+  . = CONFIG_PAGE_OFFSET;
+  _text = .;
+  __text = .;
+  __stext = .;
+  HEAD_TEXT_SECTION
+  .text : {
+       TEXT_TEXT
+       SCHED_TEXT
+       LOCK_TEXT
+       KPROBES_TEXT
+       IRQENTRY_TEXT
+       *(.text.*)
+       *(.gnu.warning)
+       }
+
+  __etext = .;                 /* End of text section */
+
+  __sdata = .;
+  RO_DATA_SECTION(PAGE_SIZE)
+  RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+  __edata = .;                 /* End of data section */
+
+  EXCEPTION_TABLE(16)
+  NOTES
+
+  . = ALIGN(PAGE_SIZE);                /* Init code and data */
+  ___init_begin = .;
+  INIT_TEXT_SECTION(PAGE_SIZE)
+  INIT_DATA_SECTION(16)
+
+  .init.arch.info : {
+         ___arch_info_begin = .;
+         *(.arch.info.init)
+         ___arch_info_end = .;
+  }
+
+  PERCPU_SECTION(L1_CACHE_BYTES)
+
+  ___init_end = .;
+
+  BSS_SECTION(0, PAGE_SIZE, 0)
+
+  __end = .;
+
+  . = ALIGN(PAGE_SIZE);
+  __heap_start = .;
+
+  DWARF_DEBUG
+
+  /* When something in the kernel is NOT compiled as a module, the
+   * module cleanup code and data are put into these segments.  Both
+   * can then be thrown away, as cleanup code is never called unless
+   * it's a module.
+   */
+  DISCARDS
+}
diff --git a/arch/metag/lib/Makefile b/arch/metag/lib/Makefile
new file mode 100644 (file)
index 0000000..a41d24e
--- /dev/null
@@ -0,0 +1,22 @@
+#
+# Makefile for Meta-specific library files.
+#
+
+lib-y += usercopy.o
+lib-y += copy_page.o
+lib-y += clear_page.o
+lib-y += memcpy.o
+lib-y += memmove.o
+lib-y += memset.o
+lib-y += delay.o
+lib-y += div64.o
+lib-y += muldi3.o
+lib-y += ashrdi3.o
+lib-y += ashldi3.o
+lib-y += lshrdi3.o
+lib-y += divsi3.o
+lib-y += modsi3.o
+lib-y += cmpdi2.o
+lib-y += ucmpdi2.o
+lib-y += ip_fast_csum.o
+lib-y += checksum.o
diff --git a/arch/metag/lib/ashldi3.S b/arch/metag/lib/ashldi3.S
new file mode 100644 (file)
index 0000000..78d6974
--- /dev/null
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift left routine.
+!
+
+       .text
+       .global ___ashldi3
+       .type   ___ashldi3,function
+
+___ashldi3:
+       MOV     D0Re0,D0Ar2
+       MOV     D1Re0,D1Ar1
+       CMP     D1Ar3,#0                ! COUNT == 0
+       MOVEQ   PC,D1RtP                ! Yes, return
+
+       SUBS    D0Ar4,D1Ar3,#32         ! N = COUNT - 32
+       BGE     $L10
+
+!! Shift < 32
+       NEG     D0Ar4,D0Ar4             ! N = - N
+       LSL     D1Re0,D1Re0,D1Ar3       ! HI = HI << COUNT
+       LSR     D0Ar6,D0Re0,D0Ar4       ! TMP= LO >> -(COUNT - 32)
+       OR      D1Re0,D1Re0,D0Ar6       ! HI = HI | TMP
+       SWAP    D0Ar4,D1Ar3
+       LSL     D0Re0,D0Re0,D0Ar4       ! LO = LO << COUNT
+       MOV     PC,D1RtP
+
+$L10:
+!! Shift >= 32
+       LSL     D1Re0,D0Re0,D0Ar4       ! HI = LO << N
+       MOV     D0Re0,#0                ! LO = 0
+       MOV     PC,D1RtP
+       .size ___ashldi3,.-___ashldi3
diff --git a/arch/metag/lib/ashrdi3.S b/arch/metag/lib/ashrdi3.S
new file mode 100644 (file)
index 0000000..7cb7ed3
--- /dev/null
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit arithmetic shift right routine.
+!
+
+       .text
+       .global ___ashrdi3
+       .type   ___ashrdi3,function
+
+___ashrdi3:
+       MOV     D0Re0,D0Ar2
+       MOV     D1Re0,D1Ar1
+       CMP     D1Ar3,#0                ! COUNT == 0
+       MOVEQ   PC,D1RtP                ! Yes, return
+
+       MOV     D0Ar4,D1Ar3
+       SUBS    D1Ar3,D1Ar3,#32         ! N = COUNT - 32
+       BGE     $L20
+
+!! Shift < 32
+       NEG     D1Ar3,D1Ar3             ! N = - N
+       LSR     D0Re0,D0Re0,D0Ar4       ! LO = LO >> COUNT
+       LSL     D0Ar6,D1Re0,D1Ar3       ! TMP= HI << -(COUNT - 32)
+       OR      D0Re0,D0Re0,D0Ar6       ! LO = LO | TMP
+       SWAP    D1Ar3,D0Ar4
+       ASR     D1Re0,D1Re0,D1Ar3       ! HI = HI >> COUNT
+       MOV     PC,D1RtP
+$L20:
+!! Shift >= 32
+       ASR     D0Re0,D1Re0,D1Ar3       ! LO = HI >> N
+       ASR     D1Re0,D1Re0,#31         ! HI = HI >> 31
+       MOV     PC,D1RtP
+       .size ___ashrdi3,.-___ashrdi3
diff --git a/arch/metag/lib/checksum.c b/arch/metag/lib/checksum.c
new file mode 100644 (file)
index 0000000..44d2e19
--- /dev/null
@@ -0,0 +1,168 @@
+/*
+ *
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             IP/TCP/UDP checksumming routines
+ *
+ * Authors:    Jorge Cwik, <jorge@laser.satlink.net>
+ *             Arnt Gulbrandsen, <agulbra@nvg.unit.no>
+ *             Tom May, <ftom@netcom.com>
+ *             Andreas Schwab, <schwab@issan.informatik.uni-dortmund.de>
+ *             Lots of code moved from tcp.c and ip.c; see those files
+ *             for more names.
+ *
+ * 03/02/96    Jes Sorensen, Andreas Schwab, Roman Hodek:
+ *             Fixed some nasty bugs, causing some horrible crashes.
+ *             A: At some points, the sum (%0) was used as
+ *             length-counter instead of the length counter
+ *             (%1). Thanks to Roman Hodek for pointing this out.
+ *             B: GCC seems to mess up if one uses too many
+ *             data-registers to hold input values and one tries to
+ *             specify d0 and d1 as scratch registers. Letting gcc
+ *             choose these registers itself solves the problem.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+
+/* Revised by Kenneth Albanowski for m68knommu. Basic problem: unaligned access
+ kills, so most of the assembly has to go. */
+
+#include <linux/module.h>
+#include <net/checksum.h>
+
+#include <asm/byteorder.h>
+
+static inline unsigned short from32to16(unsigned int x)
+{
+       /* add up 16-bit and 16-bit for 16+c bit */
+       x = (x & 0xffff) + (x >> 16);
+       /* add up carry.. */
+       x = (x & 0xffff) + (x >> 16);
+       return x;
+}
+
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+       int odd;
+       unsigned int result = 0;
+
+       if (len <= 0)
+               goto out;
+       odd = 1 & (unsigned long) buff;
+       if (odd) {
+#ifdef __LITTLE_ENDIAN
+               result += (*buff << 8);
+#else
+               result = *buff;
+#endif
+               len--;
+               buff++;
+       }
+       if (len >= 2) {
+               if (2 & (unsigned long) buff) {
+                       result += *(unsigned short *) buff;
+                       len -= 2;
+                       buff += 2;
+               }
+               if (len >= 4) {
+                       const unsigned char *end = buff + ((unsigned)len & ~3);
+                       unsigned int carry = 0;
+                       do {
+                               unsigned int w = *(unsigned int *) buff;
+                               buff += 4;
+                               result += carry;
+                               result += w;
+                               carry = (w > result);
+                       } while (buff < end);
+                       result += carry;
+                       result = (result & 0xffff) + (result >> 16);
+               }
+               if (len & 2) {
+                       result += *(unsigned short *) buff;
+                       buff += 2;
+               }
+       }
+       if (len & 1)
+#ifdef __LITTLE_ENDIAN
+               result += *buff;
+#else
+               result += (*buff << 8);
+#endif
+       result = from32to16(result);
+       if (odd)
+               result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+       return result;
+}
+EXPORT_SYMBOL(ip_fast_csum);
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum csum_partial(const void *buff, int len, __wsum wsum)
+{
+       unsigned int sum = (__force unsigned int)wsum;
+       unsigned int result = do_csum(buff, len);
+
+       /* add in old sum, and carry.. */
+       result += sum;
+       if (sum > result)
+               result += 1;
+       return (__force __wsum)result;
+}
+EXPORT_SYMBOL(csum_partial);
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+__sum16 ip_compute_csum(const void *buff, int len)
+{
+       return (__force __sum16)~do_csum(buff, len);
+}
+EXPORT_SYMBOL(ip_compute_csum);
+
+/*
+ * copy from fs while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy_from_user(const void __user *src, void *dst, int len,
+                                               __wsum sum, int *csum_err)
+{
+       int missing;
+
+       missing = __copy_from_user(dst, src, len);
+       if (missing) {
+               memset(dst + len - missing, 0, missing);
+               *csum_err = -EFAULT;
+       } else
+               *csum_err = 0;
+
+       return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
+/*
+ * copy from ds while checksumming, otherwise like csum_partial
+ */
+__wsum
+csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
+{
+       memcpy(dst, src, len);
+       return csum_partial(dst, len, sum);
+}
+EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/metag/lib/clear_page.S b/arch/metag/lib/clear_page.S
new file mode 100644 (file)
index 0000000..43144ee
--- /dev/null
@@ -0,0 +1,17 @@
+        ! Copyright 2007,2008,2009 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+        .text
+        .global        _clear_page
+        .type   _clear_page,function
+       !! D1Ar1 - page
+_clear_page:
+       MOV  TXRPT,#((PAGE_SIZE / 8) - 1)
+       MOV  D0Re0,#0
+       MOV  D1Re0,#0
+$Lclear_page_loop:
+       SETL [D1Ar1++],D0Re0,D1Re0
+       BR   $Lclear_page_loop
+       MOV  PC,D1RtP
+        .size  _clear_page,.-_clear_page
diff --git a/arch/metag/lib/cmpdi2.S b/arch/metag/lib/cmpdi2.S
new file mode 100644 (file)
index 0000000..9c5c663
--- /dev/null
@@ -0,0 +1,32 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit signed compare routine.
+!
+
+       .text
+       .global ___cmpdi2
+       .type   ___cmpdi2,function
+
+!         low    high
+! s64 a  (D0Ar2, D1Ar1)
+! s64 b  (D0Ar4, D1Ar3)
+___cmpdi2:
+       ! start at 1 (equal) and conditionally increment or decrement
+       MOV     D0Re0,#1
+
+       ! high words differ?
+       CMP     D1Ar1,D1Ar3
+       BNE     $Lhigh_differ
+
+       ! unsigned compare low words
+       CMP     D0Ar2,D0Ar4
+       SUBLO   D0Re0,D0Re0,#1
+       ADDHI   D0Re0,D0Re0,#1
+       MOV     PC,D1RtP
+
+$Lhigh_differ:
+       ! signed compare high words
+       SUBLT   D0Re0,D0Re0,#1
+       ADDGT   D0Re0,D0Re0,#1
+       MOV     PC,D1RtP
+       .size ___cmpdi2,.-___cmpdi2
diff --git a/arch/metag/lib/copy_page.S b/arch/metag/lib/copy_page.S
new file mode 100644 (file)
index 0000000..91f7d46
--- /dev/null
@@ -0,0 +1,20 @@
+        ! Copyright 2007,2008 Imagination Technologies Ltd.
+
+#include <asm/page.h>
+
+        .text
+        .global        _copy_page
+        .type   _copy_page,function
+       !! D1Ar1 - to
+       !! D0Ar2 - from
+_copy_page:
+       MOV  D0FrT,#PAGE_SIZE
+$Lcopy_page_loop:
+       GETL D0Re0,D1Re0,[D0Ar2++]
+       GETL D0Ar6,D1Ar5,[D0Ar2++]
+       SETL [D1Ar1++],D0Re0,D1Re0
+       SETL [D1Ar1++],D0Ar6,D1Ar5
+       SUBS D0FrT,D0FrT,#16
+       BNZ  $Lcopy_page_loop
+       MOV  PC,D1RtP
+        .size  _copy_page,.-_copy_page
diff --git a/arch/metag/lib/delay.c b/arch/metag/lib/delay.c
new file mode 100644 (file)
index 0000000..0b308f4
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ *     Precise Delay Loops for Meta
+ *
+ *     Copyright (C) 1993 Linus Torvalds
+ *     Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *     Copyright (C) 2007,2009 Imagination Technologies Ltd.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+
+#include <asm/core_reg.h>
+#include <asm/processor.h>
+
+/*
+ * TXTACTCYC is only 24 bits, so on chips with fast clocks it will wrap
+ * many times per-second. If it does wrap __delay will return prematurely,
+ * but this is only likely with large delay values.
+ *
+ * We also can't implement read_current_timer() with TXTACTCYC due to
+ * this wrapping behaviour.
+ */
+#define rdtimer(t) t = __core_reg_get(TXTACTCYC)
+
+void __delay(unsigned long loops)
+{
+       unsigned long bclock, now;
+
+       rdtimer(bclock);
+       do {
+               asm("NOP");
+               rdtimer(now);
+       } while ((now-bclock) < loops);
+}
+EXPORT_SYMBOL(__delay);
+
+inline void __const_udelay(unsigned long xloops)
+{
+       u64 loops = (u64)xloops * (u64)loops_per_jiffy * HZ;
+       __delay(loops >> 32);
+}
+EXPORT_SYMBOL(__const_udelay);
+
+void __udelay(unsigned long usecs)
+{
+       __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+       __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */
+}
+EXPORT_SYMBOL(__ndelay);
diff --git a/arch/metag/lib/div64.S b/arch/metag/lib/div64.S
new file mode 100644 (file)
index 0000000..1cfc934
--- /dev/null
@@ -0,0 +1,108 @@
+! Copyright (C) 2012 Imagination Technologies Ltd.
+!
+! Signed/unsigned 64-bit division routines.
+!
+
+       .text
+       .global _div_u64
+       .type   _div_u64,function
+
+_div_u64:
+$L1:
+       ORS     A0.3,D1Ar3,D0Ar4
+       BNE     $L3
+$L2:
+       MOV     D0Re0,D0Ar2
+       MOV     D1Re0,D1Ar1
+       MOV     PC,D1RtP
+$L3:
+       CMP     D1Ar3,D1Ar1
+       CMPEQ   D0Ar4,D0Ar2
+       MOV     D0Re0,#1
+       MOV     D1Re0,#0
+       BHS     $L6
+$L4:
+       ADDS    D0Ar6,D0Ar4,D0Ar4
+       ADD     D1Ar5,D1Ar3,D1Ar3
+       ADDCS   D1Ar5,D1Ar5,#1
+       CMP     D1Ar5,D1Ar3
+       CMPEQ   D0Ar6,D0Ar4
+       BLO     $L6
+$L5:
+       MOV     D0Ar4,D0Ar6
+       MOV     D1Ar3,D1Ar5
+       ADDS    D0Re0,D0Re0,D0Re0
+       ADD     D1Re0,D1Re0,D1Re0
+       ADDCS   D1Re0,D1Re0,#1
+       CMP     D1Ar3,D1Ar1
+       CMPEQ   D0Ar4,D0Ar2
+       BLO     $L4
+$L6:
+       ORS     A0.3,D1Re0,D0Re0
+       MOV     D0Ar6,#0
+       MOV     D1Ar5,D0Ar6
+       BEQ     $L10
+$L7:
+       CMP     D1Ar1,D1Ar3
+       CMPEQ   D0Ar2,D0Ar4
+       BLO     $L9
+$L8:
+       ADDS    D0Ar6,D0Ar6,D0Re0
+       ADD     D1Ar5,D1Ar5,D1Re0
+       ADDCS   D1Ar5,D1Ar5,#1
+
+       SUBS    D0Ar2,D0Ar2,D0Ar4
+       SUB     D1Ar1,D1Ar1,D1Ar3
+       SUBCS   D1Ar1,D1Ar1,#1
+$L9:
+       LSL     A0.3,D1Re0,#31
+       LSR     D0Re0,D0Re0,#1
+       LSR     D1Re0,D1Re0,#1
+       OR      D0Re0,D0Re0,A0.3
+       LSL     A0.3,D1Ar3,#31
+       LSR     D0Ar4,D0Ar4,#1
+       LSR     D1Ar3,D1Ar3,#1
+       OR      D0Ar4,D0Ar4,A0.3
+       ORS     A0.3,D1Re0,D0Re0
+       BNE     $L7
+$L10:
+       MOV     D0Re0,D0Ar6
+       MOV     D1Re0,D1Ar5
+       MOV     PC,D1RtP
+       .size _div_u64,.-_div_u64
+
+       .text
+       .global _div_s64
+       .type   _div_s64,function
+_div_s64:
+       MSETL   [A0StP],D0FrT,D0.5
+       XOR     D0.5,D0Ar2,D0Ar4
+       XOR     D1.5,D1Ar1,D1Ar3
+       TSTT    D1Ar1,#HI(0x80000000)
+       BZ      $L25
+
+       NEGS    D0Ar2,D0Ar2
+       NEG     D1Ar1,D1Ar1
+       SUBCS   D1Ar1,D1Ar1,#1
+$L25:
+       TSTT    D1Ar3,#HI(0x80000000)
+       BZ      $L27
+
+       NEGS    D0Ar4,D0Ar4
+       NEG     D1Ar3,D1Ar3
+       SUBCS   D1Ar3,D1Ar3,#1
+$L27:
+       CALLR   D1RtP,_div_u64
+       TSTT    D1.5,#HI(0x80000000)
+       BZ      $L29
+
+       NEGS    D0Re0,D0Re0
+       NEG     D1Re0,D1Re0
+       SUBCS   D1Re0,D1Re0,#1
+$L29:
+
+       GETL    D0FrT,D1RtP,[A0StP+#(-16)]
+       GETL    D0.5,D1.5,[A0StP+#(-8)]
+       SUB     A0StP,A0StP,#16
+       MOV     PC,D1RtP
+       .size _div_s64,.-_div_s64
diff --git a/arch/metag/lib/divsi3.S b/arch/metag/lib/divsi3.S
new file mode 100644 (file)
index 0000000..7c8a8ae
--- /dev/null
@@ -0,0 +1,100 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+!               Imagination Technologies Ltd
+!
+! Integer divide routines.
+!
+
+       .text
+       .global ___udivsi3
+       .type   ___udivsi3,function
+       .align  2
+___udivsi3:
+!!
+!! Since core is signed divide case, just set control variable
+!!
+       MOV     D1Re0,D0Ar2             ! Au already in A1Ar1, Bu -> D1Re0
+       MOV     D0Re0,#0                ! Result is 0
+       MOV     D0Ar4,#0                ! Return positive result
+       B       $LIDMCUStart
+       .size   ___udivsi3,.-___udivsi3
+
+!!
+!! 32-bit division signed i/p - passed signed 32-bit numbers
+!!
+       .global ___divsi3
+       .type   ___divsi3,function
+       .align  2
+___divsi3:
+!!
+!! A already in D1Ar1, B already in D0Ar2 -> make B abs(B)
+!!
+       MOV     D1Re0,D0Ar2             ! A already in A1Ar1, B -> D1Re0
+       MOV     D0Re0,#0                ! Result is 0
+       XOR     D0Ar4,D1Ar1,D1Re0       ! D0Ar4 -ive if result is -ive
+       ABS     D1Ar1,D1Ar1             ! abs(A) -> Au
+       ABS     D1Re0,D1Re0             ! abs(B) -> Bu
+$LIDMCUStart:
+       CMP     D1Ar1,D1Re0             ! Is ( Au > Bu )?
+       LSR     D1Ar3,D1Ar1,#2          ! Calculate (Au & (~3)) >> 2
+       CMPHI   D1Re0,D1Ar3             ! OR ( (Au & (~3)) <= (Bu << 2) )?
+       LSLSHI  D1Ar3,D1Re0,#1          ! Buq = Bu << 1
+       BLS     $LIDMCUSetup            ! Yes: Do normal divide
+!!
+!! Quick divide setup can assume that CurBit only needs to start at 2
+!!
+$LIDMCQuick:
+       CMP     D1Ar1,D1Ar3             ! ( A >= Buq )?
+       ADDCC   D0Re0,D0Re0,#2          ! If yes result += 2
+       SUBCC   D1Ar1,D1Ar1,D1Ar3       !        and A -= Buq
+       CMP     D1Ar1,D1Re0             ! ( A >= Bu )?
+       ADDCC   D0Re0,D0Re0,#1          ! If yes result += 1
+       SUBCC   D1Ar1,D1Ar1,D1Re0       !        and A -= Bu
+       ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
+       NEG     D0Ar2,D0Re0             ! Calulate neg result
+       MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
+$LIDMCRet:
+       MOV     PC,D1RtP
+!!
+!!  Setup for general unsigned divide code
+!!
+!!      D0Re0 is used to form the result, already set to Zero
+!!      D1Re0 is the input Bu value, this gets trashed
+!!      D0Ar6 is curbit which is set to 1 at the start and shifted up
+!!      D0Ar4 is negative if we should return a negative result
+!!      D1Ar1 is the input Au value, eventually this holds the remainder
+!!
+$LIDMCUSetup:
+       CMP     D1Ar1,D1Re0             ! Is ( Au < Bu )?
+       MOV     D0Ar6,#1                ! Set curbit to 1
+       BCS     $LIDMCRet               ! Yes: Return 0 remainder Au
+!!
+!! Calculate alignment using FFB instruction
+!!
+       FFB     D1Ar5,D1Ar1             ! Find first bit of Au
+       ANDN    D1Ar5,D1Ar5,#31         ! Handle exceptional case.
+       ORN     D1Ar5,D1Ar5,#31         ! if N bit set, set to 31
+       FFB     D1Ar3,D1Re0             ! Find first bit of Bu
+       ANDN    D1Ar3,D1Ar3,#31         ! Handle exceptional case.
+       ORN     D1Ar3,D1Ar3,#31         ! if N bit set, set to 31
+       SUBS    D1Ar3,D1Ar5,D1Ar3       ! calculate diff, ffbA - ffbB
+       MOV     D0Ar2,D1Ar3             ! copy into bank 0
+       LSLGT   D1Re0,D1Re0,D1Ar3       ! ( > 0) ? left shift B
+       LSLGT   D0Ar6,D0Ar6,D0Ar2       ! ( > 0) ? left shift curbit
+!!
+!! Now we start the divide proper, logic is
+!!
+!!       if ( A >= B ) add curbit to result and subtract B from A
+!!       shift curbit and B down by 1 in either case
+!!
+$LIDMCLoop:
+       CMP     D1Ar1, D1Re0            ! ( A >= B )?
+       ADDCC   D0Re0, D0Re0, D0Ar6     ! If yes result += curbit
+       SUBCC   D1Ar1, D1Ar1, D1Re0     ! and A -= B
+       LSRS    D0Ar6, D0Ar6, #1        ! Shift down curbit, is it zero?
+       LSR     D1Re0, D1Re0, #1        ! Shift down B
+       BNZ     $LIDMCLoop               ! Was single bit in curbit lost?
+       ORS     D0Ar4,D0Ar4,D0Ar4       ! Return neg result?
+       NEG     D0Ar2,D0Re0             ! Calulate neg result
+       MOVMI   D0Re0,D0Ar2             ! Yes: Take neg result
+       MOV     PC,D1RtP
+       .size   ___divsi3,.-___divsi3
diff --git a/arch/metag/lib/ip_fast_csum.S b/arch/metag/lib/ip_fast_csum.S
new file mode 100644 (file)
index 0000000..533b1e7
--- /dev/null
@@ -0,0 +1,32 @@
+
+       .text
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ *
+ * extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+ *
+ */
+       .global _ip_fast_csum
+       .type   _ip_fast_csum,function
+_ip_fast_csum:
+       !! TXRPT needs loops - 1
+       SUBS    TXRPT,D0Ar2,#1
+       MOV     D0Re0,#0
+       BLO     $Lfast_csum_exit
+$Lfast_csum_loop:
+       GETD    D1Ar3,[D1Ar1++]
+       ADDS    D0Re0,D0Re0,D1Ar3
+       ADDCS   D0Re0,D0Re0,#1
+       BR      $Lfast_csum_loop
+       LSR     D0Ar4,D0Re0,#16
+       AND     D0Re0,D0Re0,#0xffff
+       AND     D0Ar4,D0Ar4,#0xffff
+       ADD     D0Re0,D0Re0,D0Ar4
+       LSR     D0Ar4,D0Re0,#16
+       ADD     D0Re0,D0Re0,D0Ar4
+       XOR     D0Re0,D0Re0,#-1
+       AND     D0Re0,D0Re0,#0xffff
+$Lfast_csum_exit:
+       MOV     PC,D1RtP
+       .size _ip_fast_csum,.-_ip_fast_csum
diff --git a/arch/metag/lib/lshrdi3.S b/arch/metag/lib/lshrdi3.S
new file mode 100644 (file)
index 0000000..47f7202
--- /dev/null
@@ -0,0 +1,33 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit logical shift right routine.
+!
+
+       .text
+       .global ___lshrdi3
+       .type   ___lshrdi3,function
+
+___lshrdi3:
+       MOV     D0Re0,D0Ar2
+       MOV     D1Re0,D1Ar1
+       CMP     D1Ar3,#0                ! COUNT == 0
+       MOVEQ   PC,D1RtP                ! Yes, return
+
+       MOV     D0Ar4,D1Ar3
+       SUBS    D1Ar3,D1Ar3,#32         ! N = COUNT - 32
+       BGE     $L30
+
+!! Shift < 32
+       NEG     D1Ar3,D1Ar3             ! N = - N
+       LSR     D0Re0,D0Re0,D0Ar4       ! LO = LO >> COUNT
+       LSL     D0Ar6,D1Re0,D1Ar3       ! TMP= HI << -(COUNT - 32)
+       OR      D0Re0,D0Re0,D0Ar6       ! LO = LO | TMP
+       SWAP    D1Ar3,D0Ar4
+       LSR     D1Re0,D1Re0,D1Ar3       ! HI = HI >> COUNT
+       MOV     PC,D1RtP
+$L30:
+!! Shift >= 32
+       LSR     D0Re0,D1Re0,D1Ar3       ! LO = HI >> N
+       MOV     D1Re0,#0                ! HI = 0
+       MOV     PC,D1RtP
+       .size ___lshrdi3,.-___lshrdi3
diff --git a/arch/metag/lib/memcpy.S b/arch/metag/lib/memcpy.S
new file mode 100644 (file)
index 0000000..46b7a2b
--- /dev/null
@@ -0,0 +1,185 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+       .text
+       .global _memcpy
+       .type   _memcpy,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memcpy:
+       CMP     D1Ar3, #16
+       MOV     A1.2, D0Ar2             ! source pointer
+       MOV     A0.2, D1Ar1             ! destination pointer
+       MOV     A0.3, D1Ar1             ! for return value
+! If there are less than 16 bytes to copy use the byte copy loop
+       BGE     $Llong_copy
+
+$Lbyte_copy:
+! Simply copy a byte at a time
+       SUBS    TXRPT, D1Ar3, #1
+       BLT     $Lend
+$Lloop_byte:
+       GETB    D1Re0, [A1.2++]
+       SETB    [A0.2++], D1Re0
+       BR      $Lloop_byte
+
+$Lend:
+! Finally set return value and return
+       MOV     D0Re0, A0.3
+       MOV     PC, D1RtP
+
+$Llong_copy:
+       ANDS    D1Ar5, D1Ar1, #7        ! test destination alignment
+       BZ      $Laligned_dst
+
+! The destination address is not 8 byte aligned. We will copy bytes from
+! the source to the destination until the remaining data has an 8 byte
+! destination address alignment (i.e we should never copy more than 7
+! bytes here).
+$Lalign_dst:
+       GETB    D0Re0, [A1.2++]
+       ADD     D1Ar5, D1Ar5, #1        ! dest is aligned when D1Ar5 reaches #8
+       SUB     D1Ar3, D1Ar3, #1        ! decrement count of remaining bytes
+       SETB    [A0.2++], D0Re0
+       CMP     D1Ar5, #8
+       BNE     $Lalign_dst
+
+! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
+! blocks, then jump to the unaligned copy loop or fall through to the aligned
+! copy loop as appropriate.
+$Laligned_dst:
+       MOV     D0Ar4, A1.2
+       LSR     D1Ar5, D1Ar3, #3        ! D1Ar5 = number of 8 byte blocks
+       ANDS    D0Ar4, D0Ar4, #7        ! test source alignment
+       BNZ     $Lunaligned_copy        ! if unaligned, use unaligned copy loop
+
+! Both source and destination are 8 byte aligned - the easy case.
+$Laligned_copy:
+       LSRS    D1Ar5, D1Ar3, #5        ! D1Ar5 = number of 32 byte blocks
+       BZ      $Lbyte_copy
+       SUB     TXRPT, D1Ar5, #1
+
+$Laligned_32:
+       GETL    D0Re0, D1Re0, [A1.2++]
+       GETL    D0Ar6, D1Ar5, [A1.2++]
+       SETL    [A0.2++], D0Re0, D1Re0
+       SETL    [A0.2++], D0Ar6, D1Ar5
+       GETL    D0Re0, D1Re0, [A1.2++]
+       GETL    D0Ar6, D1Ar5, [A1.2++]
+       SETL    [A0.2++], D0Re0, D1Re0
+       SETL    [A0.2++], D0Ar6, D1Ar5
+       BR      $Laligned_32
+
+! If there are any remaining bytes use the byte copy loop, otherwise we are done
+       ANDS    D1Ar3, D1Ar3, #0x1f
+       BNZ     $Lbyte_copy
+       B       $Lend
+
+! The destination is 8 byte aligned but the source is not, and there are 8
+! or more bytes to be copied.
+$Lunaligned_copy:
+! Adjust the source pointer (A1.2) to the 8 byte boundary before its
+! current value
+       MOV     D0Ar4, A1.2
+       MOV     D0Ar6, A1.2
+       ANDMB   D0Ar4, D0Ar4, #0xfff8
+       MOV     A1.2, D0Ar4
+! Save the number of bytes of mis-alignment in D0Ar4 for use later
+       SUBS    D0Ar6, D0Ar6, D0Ar4
+       MOV     D0Ar4, D0Ar6
+! if there is no mis-alignment after all, use the aligned copy loop
+       BZ      $Laligned_copy
+
+! prefetch 8 bytes
+       GETL    D0Re0, D1Re0, [A1.2]
+
+       SUB     TXRPT, D1Ar5, #1
+
+! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
+! 4 bytes, and more than 4 bytes.
+       CMP     D0Ar6, #4
+       BLT     $Lunaligned_1_2_3       ! use 1-3 byte mis-alignment loop
+       BZ      $Lunaligned_4           ! use 4 byte mis-alignment loop
+
+! The mis-alignment is more than 4 bytes
+$Lunaligned_5_6_7:
+       SUB     D0Ar6, D0Ar6, #4
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+       MULW    D0Ar6, D0Ar6, #8
+       MOV     D1Ar5, #32
+       SUB     D1Ar5, D1Ar5, D0Ar6
+! Move data 4 bytes before we enter the main loop
+       MOV     D0Re0, D1Re0
+
+$Lloop_5_6_7:
+       GETL    D0Ar2, D1Ar1, [++A1.2]
+! form 64-bit data in D0Re0, D1Re0
+       LSR     D0Re0, D0Re0, D0Ar6
+       MOV     D1Re0, D0Ar2
+       LSL     D1Re0, D1Re0, D1Ar5
+       ADD     D0Re0, D0Re0, D1Re0
+
+       LSR     D0Ar2, D0Ar2, D0Ar6
+       LSL     D1Re0, D1Ar1, D1Ar5
+       ADD     D1Re0, D1Re0, D0Ar2
+
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D1Ar1
+       BR      $Lloop_5_6_7
+
+       B       $Lunaligned_end
+
+$Lunaligned_1_2_3:
+! Calculate the bit offsets required for the shift operations necesssary
+! to align the data.
+! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
+       MULW    D0Ar6, D0Ar6, #8
+       MOV     D1Ar5, #32
+       SUB     D1Ar5, D1Ar5, D0Ar6
+
+$Lloop_1_2_3:
+! form 64-bit data in D0Re0,D1Re0
+       LSR     D0Re0, D0Re0, D0Ar6
+       LSL     D1Ar1, D1Re0, D1Ar5
+       ADD     D0Re0, D0Re0, D1Ar1
+       MOV     D0Ar2, D1Re0
+       LSR     D0FrT, D0Ar2, D0Ar6
+       GETL    D0Ar2, D1Ar1, [++A1.2]
+
+       MOV     D1Re0, D0Ar2
+       LSL     D1Re0, D1Re0, D1Ar5
+       ADD     D1Re0, D1Re0, D0FrT
+
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D0Ar2
+       MOV     D1Re0, D1Ar1
+       BR      $Lloop_1_2_3
+
+       B       $Lunaligned_end
+
+! The 4 byte mis-alignment case - this does not require any shifting, just a
+! shuffling of registers.
+$Lunaligned_4:
+       MOV     D0Re0, D1Re0
+$Lloop_4:
+       GETL    D0Ar2, D1Ar1, [++A1.2]
+       MOV     D1Re0, D0Ar2
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D1Ar1
+       BR      $Lloop_4
+
+$Lunaligned_end:
+! If there are no remaining bytes to copy, we are done.
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lend
+! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
+! address of the remaining bytes, and fall through to the byte copy loop.
+       MOV     D0Ar6, A1.2
+       ADD     D1Ar5, D0Ar4, D0Ar6
+       MOV     A1.2, D1Ar5
+       B       $Lbyte_copy
+
+       .size _memcpy,.-_memcpy
diff --git a/arch/metag/lib/memmove.S b/arch/metag/lib/memmove.S
new file mode 100644 (file)
index 0000000..228ea04
--- /dev/null
@@ -0,0 +1,345 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+       .text
+       .global _memmove
+       .type   _memmove,function
+! D1Ar1 dst
+! D0Ar2 src
+! D1Ar3 cnt
+! D0Re0 dst
+_memmove:
+       CMP     D1Ar3, #0
+       MOV     D0Re0, D1Ar1
+       BZ      $LEND2
+       MSETL   [A0StP], D0.5, D0.6, D0.7
+       MOV     D1Ar5, D0Ar2
+       CMP     D1Ar1, D1Ar5
+       BLT     $Lforwards_copy
+       SUB     D0Ar4, D1Ar1, D1Ar3
+       ADD     D0Ar4, D0Ar4, #1
+       CMP     D0Ar2, D0Ar4
+       BLT     $Lforwards_copy
+       ! should copy backwards
+       MOV     D1Re0, D0Ar2
+       ! adjust pointer to the end of mem
+       ADD     D0Ar2, D1Re0, D1Ar3
+       ADD     D1Ar1, D1Ar1, D1Ar3
+
+       MOV     A1.2, D0Ar2
+       MOV     A0.2, D1Ar1
+       CMP     D1Ar3, #8
+       BLT     $Lbbyte_loop
+
+       MOV     D0Ar4, D0Ar2
+       MOV     D1Ar5, D1Ar1
+
+       ! test 8 byte alignment
+       ANDS    D1Ar5, D1Ar5, #7
+       BNE     $Lbdest_unaligned
+
+       ANDS    D0Ar4, D0Ar4, #7
+       BNE     $Lbsrc_unaligned
+
+       LSR     D1Ar5, D1Ar3, #3
+
+$Lbaligned_loop:
+       GETL    D0Re0, D1Re0, [--A1.2]
+       SETL    [--A0.2], D0Re0, D1Re0
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lbaligned_loop
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lbbyte_loop_exit
+$Lbbyte_loop:
+       GETB    D1Re0, [--A1.2]
+       SETB    [--A0.2], D1Re0
+       SUBS    D1Ar3, D1Ar3, #1
+       BNE     $Lbbyte_loop
+$Lbbyte_loop_exit:
+       MOV     D0Re0, A0.2
+$LEND:
+       SUB     A0.2, A0StP, #24
+       MGETL   D0.5, D0.6, D0.7, [A0.2]
+       SUB     A0StP, A0StP, #24
+$LEND2:
+       MOV     PC, D1RtP
+
+$Lbdest_unaligned:
+       GETB    D0Re0, [--A1.2]
+       SETB    [--A0.2], D0Re0
+       SUBS    D1Ar5, D1Ar5, #1
+       SUB     D1Ar3, D1Ar3, #1
+       BNE     $Lbdest_unaligned
+       CMP     D1Ar3, #8
+       BLT     $Lbbyte_loop
+$Lbsrc_unaligned:
+       LSR     D1Ar5, D1Ar3, #3
+       ! adjust A1.2
+       MOV     D0Ar4, A1.2
+       ! save original address
+       MOV     D0Ar6, A1.2
+
+       ADD     D0Ar4, D0Ar4, #7
+       ANDMB   D0Ar4, D0Ar4, #0xfff8
+       ! new address is the 8-byte aligned one above the original
+       MOV     A1.2, D0Ar4
+
+       ! A0.2 dst 64-bit is aligned
+       ! measure the gap size
+       SUB     D0Ar6, D0Ar4, D0Ar6
+       MOVS    D0Ar4, D0Ar6
+       ! keep this information for the later adjustment
+       ! both aligned
+       BZ      $Lbaligned_loop
+
+       ! prefetch
+       GETL    D0Re0, D1Re0, [--A1.2]
+
+       CMP     D0Ar6, #4
+       BLT     $Lbunaligned_1_2_3
+       ! 32-bit aligned
+       BZ      $Lbaligned_4
+
+       SUB     D0Ar6, D0Ar6, #4
+       ! D1.6 stores the gap size in bits
+       MULW    D1.6, D0Ar6, #8
+       MOV     D0.6, #32
+       ! D0.6 stores the complement of the gap size
+       SUB     D0.6, D0.6, D1.6
+
+$Lbunaligned_5_6_7:
+       GETL    D0.7, D1.7, [--A1.2]
+       ! form 64-bit data in D0Re0, D1Re0
+       MOV     D1Re0, D0Re0
+       ! D1Re0 << gap-size
+       LSL     D1Re0, D1Re0, D1.6
+       MOV     D0Re0, D1.7
+       ! D0Re0 >> complement
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D1.5, D0Re0
+       ! combine the both
+       ADD     D1Re0, D1Re0, D1.5
+
+       MOV     D1.5, D1.7
+       LSL     D1.5, D1.5, D1.6
+       MOV     D0Re0, D0.7
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D0.5, D1.5
+       ADD     D0Re0, D0Re0, D0.5
+
+       SETL    [--A0.2], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lbunaligned_5_6_7
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lbbyte_loop_exit
+       ! Adjust A1.2
+       ! A1.2 <- A1.2 +8 - gapsize
+       ADD     A1.2, A1.2, #8
+       SUB     A1.2, A1.2, D0Ar4
+       B       $Lbbyte_loop
+
+$Lbunaligned_1_2_3:
+       MULW    D1.6, D0Ar6, #8
+       MOV     D0.6, #32
+       SUB     D0.6, D0.6, D1.6
+
+$Lbunaligned_1_2_3_loop:
+       GETL    D0.7, D1.7, [--A1.2]
+       ! form 64-bit data in D0Re0, D1Re0
+       LSL     D1Re0, D1Re0, D1.6
+       ! save D0Re0 for later use
+       MOV     D0.5, D0Re0
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D1.5, D0Re0
+       ADD     D1Re0, D1Re0, D1.5
+
+       ! orignal data in D0Re0
+       MOV     D1.5, D0.5
+       LSL     D1.5, D1.5, D1.6
+       MOV     D0Re0, D1.7
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D0.5, D1.5
+       ADD     D0Re0, D0Re0, D0.5
+
+       SETL    [--A0.2], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lbunaligned_1_2_3_loop
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lbbyte_loop_exit
+       ! Adjust A1.2
+       ADD     A1.2, A1.2, #8
+       SUB     A1.2, A1.2, D0Ar4
+       B       $Lbbyte_loop
+
+$Lbaligned_4:
+       GETL    D0.7, D1.7, [--A1.2]
+       MOV     D1Re0, D0Re0
+       MOV     D0Re0, D1.7
+       SETL    [--A0.2], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lbaligned_4
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lbbyte_loop_exit
+       ! Adjust A1.2
+       ADD     A1.2, A1.2, #8
+       SUB     A1.2, A1.2, D0Ar4
+       B       $Lbbyte_loop
+
+$Lforwards_copy:
+       MOV     A1.2, D0Ar2
+       MOV     A0.2, D1Ar1
+       CMP     D1Ar3, #8
+       BLT     $Lfbyte_loop
+
+       MOV     D0Ar4, D0Ar2
+       MOV     D1Ar5, D1Ar1
+
+       ANDS    D1Ar5, D1Ar5, #7
+       BNE     $Lfdest_unaligned
+
+       ANDS    D0Ar4, D0Ar4, #7
+       BNE     $Lfsrc_unaligned
+
+       LSR     D1Ar5, D1Ar3, #3
+
+$Lfaligned_loop:
+       GETL    D0Re0, D1Re0, [A1.2++]
+       SUBS    D1Ar5, D1Ar5, #1
+       SETL    [A0.2++], D0Re0, D1Re0
+       BNE     $Lfaligned_loop
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lfbyte_loop_exit
+$Lfbyte_loop:
+       GETB    D1Re0, [A1.2++]
+       SETB    [A0.2++], D1Re0
+       SUBS    D1Ar3, D1Ar3, #1
+       BNE     $Lfbyte_loop
+$Lfbyte_loop_exit:
+       MOV     D0Re0, D1Ar1
+       B       $LEND
+
+$Lfdest_unaligned:
+       GETB    D0Re0, [A1.2++]
+       ADD     D1Ar5, D1Ar5, #1
+       SUB     D1Ar3, D1Ar3, #1
+       SETB    [A0.2++], D0Re0
+       CMP     D1Ar5, #8
+       BNE     $Lfdest_unaligned
+       CMP     D1Ar3, #8
+       BLT     $Lfbyte_loop
+$Lfsrc_unaligned:
+       ! adjust A1.2
+       LSR     D1Ar5, D1Ar3, #3
+
+       MOV     D0Ar4, A1.2
+       MOV     D0Ar6, A1.2
+       ANDMB   D0Ar4, D0Ar4, #0xfff8
+       MOV     A1.2, D0Ar4
+
+       ! A0.2 dst 64-bit is aligned
+       SUB     D0Ar6, D0Ar6, D0Ar4
+       ! keep the information for the later adjustment
+       MOVS    D0Ar4, D0Ar6
+
+       ! both aligned
+       BZ      $Lfaligned_loop
+
+       ! prefetch
+       GETL    D0Re0, D1Re0, [A1.2]
+
+       CMP     D0Ar6, #4
+       BLT     $Lfunaligned_1_2_3
+       BZ      $Lfaligned_4
+
+       SUB     D0Ar6, D0Ar6, #4
+       MULW    D0.6, D0Ar6, #8
+       MOV     D1.6, #32
+       SUB     D1.6, D1.6, D0.6
+
+$Lfunaligned_5_6_7:
+       GETL    D0.7, D1.7, [++A1.2]
+       ! form 64-bit data in D0Re0, D1Re0
+       MOV     D0Re0, D1Re0
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D1Re0, D0.7
+       LSL     D1Re0, D1Re0, D1.6
+       MOV     D0.5, D1Re0
+       ADD     D0Re0, D0Re0, D0.5
+
+       MOV     D0.5, D0.7
+       LSR     D0.5, D0.5, D0.6
+       MOV     D1Re0, D1.7
+       LSL     D1Re0, D1Re0, D1.6
+       MOV     D1.5, D0.5
+       ADD     D1Re0, D1Re0, D1.5
+
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lfunaligned_5_6_7
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lfbyte_loop_exit
+       ! Adjust A1.2
+       ADD     A1.2, A1.2, D0Ar4
+       B       $Lfbyte_loop
+
+$Lfunaligned_1_2_3:
+       MULW    D0.6, D0Ar6, #8
+       MOV     D1.6, #32
+       SUB     D1.6, D1.6, D0.6
+
+$Lfunaligned_1_2_3_loop:
+       GETL    D0.7, D1.7, [++A1.2]
+       ! form 64-bit data in D0Re0, D1Re0
+       LSR     D0Re0, D0Re0, D0.6
+       MOV     D1.5, D1Re0
+       LSL     D1Re0, D1Re0, D1.6
+       MOV     D0.5, D1Re0
+       ADD     D0Re0, D0Re0, D0.5
+
+       MOV     D0.5, D1.5
+       LSR     D0.5, D0.5, D0.6
+       MOV     D1Re0, D0.7
+       LSL     D1Re0, D1Re0, D1.6
+       MOV     D1.5, D0.5
+       ADD     D1Re0, D1Re0, D1.5
+
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lfunaligned_1_2_3_loop
+
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lfbyte_loop_exit
+       ! Adjust A1.2
+       ADD     A1.2, A1.2, D0Ar4
+       B       $Lfbyte_loop
+
+$Lfaligned_4:
+       GETL    D0.7, D1.7, [++A1.2]
+       MOV     D0Re0, D1Re0
+       MOV     D1Re0, D0.7
+       SETL    [A0.2++], D0Re0, D1Re0
+       MOV     D0Re0, D0.7
+       MOV     D1Re0, D1.7
+       SUBS    D1Ar5, D1Ar5, #1
+       BNE     $Lfaligned_4
+       ANDS    D1Ar3, D1Ar3, #7
+       BZ      $Lfbyte_loop_exit
+       ! Adjust A1.2
+       ADD     A1.2, A1.2, D0Ar4
+       B       $Lfbyte_loop
+
+       .size _memmove,.-_memmove
diff --git a/arch/metag/lib/memset.S b/arch/metag/lib/memset.S
new file mode 100644 (file)
index 0000000..721085b
--- /dev/null
@@ -0,0 +1,86 @@
+!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
+
+       .text
+       .global _memset
+       .type   _memset,function
+! D1Ar1 dst
+! D0Ar2 c
+! D1Ar3 cnt
+! D0Re0 dst
+_memset:
+       AND     D0Ar2,D0Ar2,#0xFF       ! Ensure a byte input value
+       MULW    D0Ar2,D0Ar2,#0x0101     ! Duplicate byte value into  0-15
+       ANDS    D0Ar4,D1Ar1,#7          ! Extract bottom LSBs of dst
+       LSL     D0Re0,D0Ar2,#16         ! Duplicate byte value into 16-31
+       ADD     A0.2,D0Ar2,D0Re0        ! Duplicate byte value into 4 (A0.2)
+       MOV     D0Re0,D1Ar1             ! Return dst
+       BZ      $LLongStub              ! if start address is aligned
+       ! start address is not aligned on an 8 byte boundary, so we
+       ! need the number of bytes up to the next 8 byte address
+       ! boundary, or the length of the string if less than 8, in D1Ar5
+       MOV     D0Ar2,#8                ! Need 8 - N in D1Ar5 ...
+       SUB     D1Ar5,D0Ar2,D0Ar4       !            ... subtract N
+       CMP     D1Ar3,D1Ar5
+       MOVMI   D1Ar5,D1Ar3
+       B       $LByteStub              ! dst is mis-aligned, do $LByteStub
+
+!
+! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles)
+!
+$LLongStub:
+       LSRS    D0Ar2,D1Ar3,#5
+       AND     D1Ar3,D1Ar3,#0x1F
+       MOV     A1.2,A0.2
+       BEQ     $LLongishStub
+       SUB     TXRPT,D0Ar2,#1
+       CMP     D1Ar3,#0
+$LLongLoop:
+       SETL    [D1Ar1++],A0.2,A1.2
+       SETL    [D1Ar1++],A0.2,A1.2
+       SETL    [D1Ar1++],A0.2,A1.2
+       SETL    [D1Ar1++],A0.2,A1.2
+       BR      $LLongLoop
+       BZ      $Lexit
+!
+! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles)
+!
+$LLongishStub:
+       LSRS    D0Ar2,D1Ar3,#3
+       AND     D1Ar3,D1Ar3,#0x7
+       MOV     D1Ar5,D1Ar3
+       BEQ     $LByteStub
+       SUB     TXRPT,D0Ar2,#1
+       CMP     D1Ar3,#0
+$LLongishLoop:
+       SETL    [D1Ar1++],A0.2,A1.2
+       BR      $LLongishLoop
+       BZ      $Lexit
+!
+! This does a byte structured burst of up to 7 bytes
+!
+!      D1Ar1 should point to the location required
+!      D1Ar3 should be the remaining total byte count
+!      D1Ar5 should be burst size (<= D1Ar3)
+!
+$LByteStub:
+       SUBS    D1Ar3,D1Ar3,D1Ar5       ! Reduce count
+       ADD     D1Ar1,D1Ar1,D1Ar5       ! Advance pointer to end of area
+       MULW    D1Ar5,D1Ar5,#4          ! Scale to (1*4), (2*4), (3*4)
+       SUB     D1Ar5,D1Ar5,#(8*4)      ! Rebase to -(7*4), -(6*4), -(5*4), ...
+       MOV     A1.2,D1Ar5
+       SUB     PC,CPC1,A1.2            ! Jump into table below
+       SETB    [D1Ar1+#(-7)],A0.2
+       SETB    [D1Ar1+#(-6)],A0.2
+       SETB    [D1Ar1+#(-5)],A0.2
+       SETB    [D1Ar1+#(-4)],A0.2
+       SETB    [D1Ar1+#(-3)],A0.2
+       SETB    [D1Ar1+#(-2)],A0.2
+       SETB    [D1Ar1+#(-1)],A0.2
+!
+! Return if all data has been output, otherwise do $LLongStub
+!
+       BNZ     $LLongStub
+$Lexit:
+       MOV     PC,D1RtP
+        .size _memset,.-_memset
+
diff --git a/arch/metag/lib/modsi3.S b/arch/metag/lib/modsi3.S
new file mode 100644 (file)
index 0000000..210cfa8
--- /dev/null
@@ -0,0 +1,38 @@
+! Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+!               Imagination Technologies Ltd
+!
+! Integer modulus routines.
+!
+!!
+!! 32-bit modulus unsigned i/p - passed unsigned 32-bit numbers
+!!
+       .text
+       .global ___umodsi3
+       .type   ___umodsi3,function
+       .align  2
+___umodsi3:
+       MOV     D0FrT,D1RtP             ! Save original return address
+       CALLR   D1RtP,___udivsi3
+       MOV     D1RtP,D0FrT             ! Recover return address
+       MOV     D0Re0,D1Ar1             ! Return remainder
+       MOV     PC,D1RtP
+       .size   ___umodsi3,.-___umodsi3
+
+!!
+!! 32-bit modulus signed i/p - passed signed 32-bit numbers
+!!
+       .global ___modsi3
+       .type   ___modsi3,function
+       .align  2
+___modsi3:
+       MOV     D0FrT,D1RtP             ! Save original return address
+       MOV     A0.2,D1Ar1              ! Save A in A0.2
+       CALLR   D1RtP,___divsi3
+       MOV     D1RtP,D0FrT             ! Recover return address
+       MOV     D1Re0,A0.2              ! Recover A
+       MOV     D0Re0,D1Ar1             ! Return remainder
+       ORS     D1Re0,D1Re0,D1Re0       ! Was A negative?
+       NEG     D1Ar1,D1Ar1             ! Negate remainder
+       MOVMI   D0Re0,D1Ar1             ! Return neg remainder
+       MOV     PC, D1RtP
+       .size   ___modsi3,.-___modsi3
diff --git a/arch/metag/lib/muldi3.S b/arch/metag/lib/muldi3.S
new file mode 100644 (file)
index 0000000..ee66ca8
--- /dev/null
@@ -0,0 +1,44 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit multiply routine.
+!
+
+!
+! 64-bit signed/unsigned multiply
+!
+! A = D1Ar1:D0Ar2 = a 2^48 + b 2^32 +  c 2^16 + d 2^0
+!
+! B = D1Ar3:D0Ar4 = w 2^48 + x 2^32 +  y 2^16 + z 2^0
+!
+       .text
+       .global ___muldi3
+       .type   ___muldi3,function
+
+___muldi3:
+       MULD    D1Re0,D1Ar1,D0Ar4       ! (a 2^48 + b 2^32)(y 2^16 + z 2^0)
+       MULD    D0Re0,D0Ar2,D1Ar3       ! (w 2^48 + x 2^32)(c 2^16 + d 2^0)
+       ADD     D1Re0,D1Re0,D0Re0
+
+       MULW    D0Re0,D0Ar2,D0Ar4       ! (d 2^0)  * (z 2^0)
+
+       RTDW    D0Ar2,D0Ar2
+       MULW    D0Ar6,D0Ar2,D0Ar4       ! (c 2^16)(z 2^0)
+       LSR     D1Ar5,D0Ar6,#16
+       LSL     D0Ar6,D0Ar6,#16
+       ADDS    D0Re0,D0Re0,D0Ar6
+       ADDCS   D1Re0,D1Re0,#1
+       RTDW    D0Ar4,D0Ar4
+       ADD     D1Re0,D1Re0,D1Ar5
+
+       MULW    D0Ar6,D0Ar2,D0Ar4       ! (c 2^16)(y 2^16)
+       ADD     D1Re0,D1Re0,D0Ar6
+
+       RTDW    D0Ar2,D0Ar2
+       MULW    D0Ar6,D0Ar2,D0Ar4       ! (d 2^0)(y 2^16)
+       LSR     D1Ar5,D0Ar6,#16
+       LSL     D0Ar6,D0Ar6,#16
+       ADDS    D0Re0,D0Re0,D0Ar6
+       ADD     D1Re0,D1Re0,D1Ar5
+       ADDCS   D1Re0,D1Re0,#1
+       MOV     PC, D1RtP
+       .size ___muldi3,.-___muldi3
diff --git a/arch/metag/lib/ucmpdi2.S b/arch/metag/lib/ucmpdi2.S
new file mode 100644 (file)
index 0000000..6f3347f
--- /dev/null
@@ -0,0 +1,27 @@
+! Copyright (C) 2012 by Imagination Technologies Ltd.
+!
+! 64-bit unsigned compare routine.
+!
+
+       .text
+       .global ___ucmpdi2
+       .type   ___ucmpdi2,function
+
+!         low    high
+! u64 a  (D0Ar2, D1Ar1)
+! u64 b  (D0Ar4, D1Ar3)
+___ucmpdi2:
+       ! start at 1 (equal) and conditionally increment or decrement
+       MOV     D0Re0,#1
+
+       ! high words
+       CMP     D1Ar1,D1Ar3
+       ! or if equal, low words
+       CMPEQ   D0Ar2,D0Ar4
+
+       ! unsigned compare
+       SUBLO   D0Re0,D0Re0,#1
+       ADDHI   D0Re0,D0Re0,#1
+
+       MOV     PC,D1RtP
+       .size ___ucmpdi2,.-___ucmpdi2
diff --git a/arch/metag/lib/usercopy.c b/arch/metag/lib/usercopy.c
new file mode 100644 (file)
index 0000000..b3ebfe9
--- /dev/null
@@ -0,0 +1,1354 @@
+/*
+ * User address space access functions.
+ * The non-inlined parts of asm-metag/uaccess.h are here.
+ *
+ * Copyright (C) 2006, Imagination Technologies.
+ * Copyright (C) 2000, Axis Communications AB.
+ *
+ * Written by Hans-Peter Nilsson.
+ * Pieces used from memcpy, originally by Kenny Ranerup long time ago.
+ * Modified for Meta by Will Newton.
+ */
+
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <asm/cache.h>                 /* def of L1_CACHE_BYTES */
+
+#define USE_RAPF
+#define RAPF_MIN_BUF_SIZE      (3*L1_CACHE_BYTES)
+
+
+/* The "double write" in this code is because the Meta will not fault
+ * immediately unless the memory pipe is forced to by e.g. a data stall or
+ * another memory op. The second write should be discarded by the write
+ * combiner so should have virtually no cost.
+ */
+
+#define __asm_copy_user_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       asm volatile (                                           \
+               COPY                                             \
+               "1:\n"                                           \
+               "       .section .fixup,\"ax\"\n"                \
+               "       MOV D1Ar1,#0\n"                          \
+               FIXUP                                            \
+               "       MOVT    D1Ar1,#HI(1b)\n"                 \
+               "       JUMP    D1Ar1,#LO(1b)\n"                 \
+               "       .previous\n"                             \
+               "       .section __ex_table,\"a\"\n"             \
+               TENTRY                                           \
+               "       .previous\n"                             \
+               : "=r" (to), "=r" (from), "=r" (ret)             \
+               : "0" (to), "1" (from), "2" (ret)                \
+               : "D1Ar1", "memory")
+
+
+#define __asm_copy_to_user_1(to, from, ret)    \
+       __asm_copy_user_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"    \
+               "       SETB [%0],D1Ar1\n"      \
+               "2:     SETB [%0++],D1Ar1\n",   \
+               "3:     ADD  %2,%2,#1\n",       \
+               "       .long 2b,3b\n")
+
+#define __asm_copy_to_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_user_cont(to, from, ret,             \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "       SETW [%0],D1Ar1\n"              \
+               "2:     SETW [%0++],D1Ar1\n" COPY,      \
+               "3:     ADD  %2,%2,#2\n" FIXUP,         \
+               "       .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_2(to, from, ret) \
+       __asm_copy_to_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_3(to, from, ret) \
+       __asm_copy_to_user_2x_cont(to, from, ret,       \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "4:     SETB [%0++],D1Ar1\n",           \
+               "5:     ADD  %2,%2,#1\n",               \
+               "       .long 4b,5b\n")
+
+#define __asm_copy_to_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_user_cont(to, from, ret,             \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "       SETD [%0],D1Ar1\n"              \
+               "2:     SETD [%0++],D1Ar1\n" COPY,      \
+               "3:     ADD  %2,%2,#4\n" FIXUP,         \
+               "       .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_to_user_4(to, from, ret) \
+       __asm_copy_to_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_5(to, from, ret) \
+       __asm_copy_to_user_4x_cont(to, from, ret,       \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "4:     SETB [%0++],D1Ar1\n",           \
+               "5:     ADD  %2,%2,#1\n",               \
+               "       .long 4b,5b\n")
+
+#define __asm_copy_to_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_4x_cont(to, from, ret,       \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "       SETW [%0],D1Ar1\n"              \
+               "4:     SETW [%0++],D1Ar1\n" COPY,      \
+               "5:     ADD  %2,%2,#2\n" FIXUP,         \
+               "       .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_6(to, from, ret) \
+       __asm_copy_to_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_7(to, from, ret) \
+       __asm_copy_to_user_6x_cont(to, from, ret,       \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "6:     SETB [%0++],D1Ar1\n",           \
+               "7:     ADD  %2,%2,#1\n",               \
+               "       .long 6b,7b\n")
+
+#define __asm_copy_to_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_4x_cont(to, from, ret,       \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "       SETD [%0],D1Ar1\n"              \
+               "4:     SETD [%0++],D1Ar1\n" COPY,      \
+               "5:     ADD  %2,%2,#4\n"  FIXUP,        \
+               "       .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_to_user_8(to, from, ret) \
+       __asm_copy_to_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_9(to, from, ret) \
+       __asm_copy_to_user_8x_cont(to, from, ret,       \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "6:     SETB [%0++],D1Ar1\n",           \
+               "7:     ADD  %2,%2,#1\n",               \
+               "       .long 6b,7b\n")
+
+#define __asm_copy_to_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_8x_cont(to, from, ret,       \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "       SETW [%0],D1Ar1\n"              \
+               "6:     SETW [%0++],D1Ar1\n" COPY,      \
+               "7:     ADD  %2,%2,#2\n" FIXUP,         \
+               "       .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_to_user_10(to, from, ret) \
+       __asm_copy_to_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_11(to, from, ret) \
+       __asm_copy_to_user_10x_cont(to, from, ret,      \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "8:     SETB [%0++],D1Ar1\n",           \
+               "9:     ADD  %2,%2,#1\n",               \
+               "       .long 8b,9b\n")
+
+#define __asm_copy_to_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_8x_cont(to, from, ret,       \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "       SETD [%0],D1Ar1\n"              \
+               "6:     SETD [%0++],D1Ar1\n" COPY,      \
+               "7:     ADD  %2,%2,#4\n" FIXUP,         \
+               "       .long 6b,7b\n" TENTRY)
+#define __asm_copy_to_user_12(to, from, ret) \
+       __asm_copy_to_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_13(to, from, ret) \
+       __asm_copy_to_user_12x_cont(to, from, ret,      \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "8:     SETB [%0++],D1Ar1\n",           \
+               "9:     ADD  %2,%2,#1\n",               \
+               "       .long 8b,9b\n")
+
+#define __asm_copy_to_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_12x_cont(to, from, ret,      \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "       SETW [%0],D1Ar1\n"              \
+               "8:     SETW [%0++],D1Ar1\n" COPY,      \
+               "9:     ADD  %2,%2,#2\n" FIXUP,         \
+               "       .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_14(to, from, ret) \
+       __asm_copy_to_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_15(to, from, ret) \
+       __asm_copy_to_user_14x_cont(to, from, ret,      \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "       SETB [%0],D1Ar1\n"              \
+               "10:    SETB [%0++],D1Ar1\n",           \
+               "11:    ADD  %2,%2,#1\n",               \
+               "       .long 10b,11b\n")
+
+#define __asm_copy_to_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_to_user_12x_cont(to, from, ret,      \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "       SETD [%0],D1Ar1\n"              \
+               "8:     SETD [%0++],D1Ar1\n" COPY,      \
+               "9:     ADD  %2,%2,#4\n" FIXUP,         \
+               "       .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_to_user_16(to, from, ret) \
+               __asm_copy_to_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_to_user_8x64(to, from, ret) \
+       asm volatile (                                  \
+               "       GETL D0Ar2,D1Ar1,[%1++]\n"      \
+               "       SETL [%0],D0Ar2,D1Ar1\n"        \
+               "2:     SETL [%0++],D0Ar2,D1Ar1\n"      \
+               "1:\n"                                  \
+               "       .section .fixup,\"ax\"\n"       \
+               "3:     ADD  %2,%2,#8\n"                \
+               "       MOVT    D0Ar2,#HI(1b)\n"        \
+               "       JUMP    D0Ar2,#LO(1b)\n"        \
+               "       .previous\n"                    \
+               "       .section __ex_table,\"a\"\n"    \
+               "       .long 2b,3b\n"                  \
+               "       .previous\n"                    \
+               : "=r" (to), "=r" (from), "=r" (ret)    \
+               : "0" (to), "1" (from), "2" (ret)       \
+               : "D1Ar1", "D0Ar2", "memory")
+
+/*
+ *     optimized copying loop using RAPF when 64 bit aligned
+ *
+ *     n               will be automatically decremented inside the loop
+ *     ret             will be left intact. if error occurs we will rewind
+ *                     so that the original non optimized code will fill up
+ *                     this value correctly.
+ *
+ *     on fault:
+ *             >       n will hold total number of uncopied bytes
+ *
+ *             >       {'to','from'} will be rewind back so that
+ *                     the non-optimized code will do the proper fix up
+ *
+ *     DCACHE drops the cacheline which helps in reducing cache
+ *     pollution.
+ *
+ *     We introduce an extra SETL at the end of the loop to
+ *     ensure we don't fall off the loop before we catch all
+ *     erros.
+ *
+ *     NOTICE:
+ *             LSM_STEP in TXSTATUS must be cleared in fix up code.
+ *             since we're using M{S,G}ETL, a fault might happen at
+ *             any address in the middle of M{S,G}ETL causing
+ *             the value of LSM_STEP to be incorrect which can
+ *             cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ *             ie: if LSM_STEP was 1 when a fault occurs, the
+ *             next call to M{S,G}ET{L,D} will skip the first
+ *             copy/getting as it think that the first 1 has already
+ *             been done.
+ *
+ */
+#define __asm_copy_user_64bit_rapf_loop(                               \
+               to, from, ret, n, id, FIXUP)                            \
+       asm volatile (                                                  \
+               ".balign 8\n"                                           \
+               "MOV    RAPF, %1\n"                                     \
+               "MSETL  [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"    \
+               "MOV    D0Ar6, #0\n"                                    \
+               "LSR    D1Ar5, %3, #6\n"                                \
+               "SUB    TXRPT, D1Ar5, #2\n"                             \
+               "MOV    RAPF, %1\n"                                     \
+               "$Lloop"id":\n"                                         \
+               "ADD    RAPF, %1, #64\n"                                \
+               "21:\n"                                                 \
+               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "22:\n"                                                 \
+               "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #32\n"                                  \
+               "23:\n"                                                 \
+               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "24:\n"                                                 \
+               "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #32\n"                                  \
+               "DCACHE [%1+#-64], D0Ar6\n"                             \
+               "BR     $Lloop"id"\n"                                   \
+                                                                       \
+               "MOV    RAPF, %1\n"                                     \
+               "25:\n"                                                 \
+               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "26:\n"                                                 \
+               "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #32\n"                                  \
+               "27:\n"                                                 \
+               "MGETL  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "28:\n"                                                 \
+               "MSETL  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %0, %0, #8\n"                                   \
+               "29:\n"                                                 \
+               "SETL   [%0++], D0.7, D1.7\n"                           \
+               "SUB    %3, %3, #32\n"                                  \
+               "1:"                                                    \
+               "DCACHE [%1+#-64], D0Ar6\n"                             \
+               "GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"                  \
+               "GETL    D0FrT, D1RtP, [A0StP+#-32]\n"                  \
+               "GETL    D0.5, D1.5, [A0StP+#-24]\n"                    \
+               "GETL    D0.6, D1.6, [A0StP+#-16]\n"                    \
+               "GETL    D0.7, D1.7, [A0StP+#-8]\n"                     \
+               "SUB A0StP, A0StP, #40\n"                               \
+               "       .section .fixup,\"ax\"\n"                       \
+               "4:\n"                                                  \
+               "       ADD     %0, %0, #8\n"                           \
+               "3:\n"                                                  \
+               "       MOV     D0Ar2, TXSTATUS\n"                      \
+               "       MOV     D1Ar1, TXSTATUS\n"                      \
+               "       AND     D1Ar1, D1Ar1, #0xFFFFF8FF\n"            \
+               "       MOV     TXSTATUS, D1Ar1\n"                      \
+                       FIXUP                                           \
+               "       MOVT    D0Ar2,#HI(1b)\n"                        \
+               "       JUMP    D0Ar2,#LO(1b)\n"                        \
+               "       .previous\n"                                    \
+               "       .section __ex_table,\"a\"\n"                    \
+               "       .long 21b,3b\n"                                 \
+               "       .long 22b,3b\n"                                 \
+               "       .long 23b,3b\n"                                 \
+               "       .long 24b,3b\n"                                 \
+               "       .long 25b,3b\n"                                 \
+               "       .long 26b,3b\n"                                 \
+               "       .long 27b,3b\n"                                 \
+               "       .long 28b,3b\n"                                 \
+               "       .long 29b,4b\n"                                 \
+               "       .previous\n"                                    \
+               : "=r" (to), "=r" (from), "=r" (ret), "=d" (n)          \
+               : "0" (to), "1" (from), "2" (ret), "3" (n)              \
+               : "D1Ar1", "D0Ar2", "memory")
+
+/*     rewind 'to' and 'from'  pointers when a fault occurs
+ *
+ *     Rationale:
+ *             A fault always occurs on writing to user buffer. A fault
+ *             is at a single address, so we need to rewind by only 4
+ *             bytes.
+ *             Since we do a complete read from kernel buffer before
+ *             writing, we need to rewind it also. The amount to be
+ *             rewind equals the number of faulty writes in MSETD
+ *             which is: [4 - (LSM_STEP-1)]*8
+ *             LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *             and stored in D0Ar2
+ *
+ *             NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *                     LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *                     a fault happens at the 4th write, LSM_STEP will be 0
+ *                     instead of 4. The code copes with that.
+ *
+ *             n is updated by the number of successful writes, which is:
+ *             n = n - (LSM_STEP-1)*8
+ */
+#define __asm_copy_to_user_64bit_rapf_loop(to, from, ret, n, id)\
+       __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,           \
+               "LSR    D0Ar2, D0Ar2, #8\n"                             \
+               "AND    D0Ar2, D0Ar2, #0x7\n"                           \
+               "ADDZ   D0Ar2, D0Ar2, #4\n"                             \
+               "SUB    D0Ar2, D0Ar2, #1\n"                             \
+               "MOV    D1Ar1, #4\n"                                    \
+               "SUB    D0Ar2, D1Ar1, D0Ar2\n"                          \
+               "LSL    D0Ar2, D0Ar2, #3\n"                             \
+               "LSL    D1Ar1, D1Ar1, #3\n"                             \
+               "SUB    D1Ar1, D1Ar1, D0Ar2\n"                          \
+               "SUB    %0, %0, #8\n"                                   \
+               "SUB    %1,     %1,D0Ar2\n"                             \
+               "SUB    %3, %3, D1Ar1\n")
+
+/*
+ *     optimized copying loop using RAPF when 32 bit aligned
+ *
+ *     n               will be automatically decremented inside the loop
+ *     ret             will be left intact. if error occurs we will rewind
+ *                     so that the original non optimized code will fill up
+ *                     this value correctly.
+ *
+ *     on fault:
+ *             >       n will hold total number of uncopied bytes
+ *
+ *             >       {'to','from'} will be rewind back so that
+ *                     the non-optimized code will do the proper fix up
+ *
+ *     DCACHE drops the cacheline which helps in reducing cache
+ *     pollution.
+ *
+ *     We introduce an extra SETD at the end of the loop to
+ *     ensure we don't fall off the loop before we catch all
+ *     erros.
+ *
+ *     NOTICE:
+ *             LSM_STEP in TXSTATUS must be cleared in fix up code.
+ *             since we're using M{S,G}ETL, a fault might happen at
+ *             any address in the middle of M{S,G}ETL causing
+ *             the value of LSM_STEP to be incorrect which can
+ *             cause subsequent use of M{S,G}ET{L,D} to go wrong.
+ *             ie: if LSM_STEP was 1 when a fault occurs, the
+ *             next call to M{S,G}ET{L,D} will skip the first
+ *             copy/getting as it think that the first 1 has already
+ *             been done.
+ *
+ */
+#define __asm_copy_user_32bit_rapf_loop(                               \
+                       to,     from, ret, n, id, FIXUP)                \
+       asm volatile (                                                  \
+               ".balign 8\n"                                           \
+               "MOV    RAPF, %1\n"                                     \
+               "MSETL  [A0StP++], D0Ar6, D0FrT, D0.5, D0.6, D0.7\n"    \
+               "MOV    D0Ar6, #0\n"                                    \
+               "LSR    D1Ar5, %3, #6\n"                                \
+               "SUB    TXRPT, D1Ar5, #2\n"                             \
+               "MOV    RAPF, %1\n"                                     \
+       "$Lloop"id":\n"                                                 \
+               "ADD    RAPF, %1, #64\n"                                \
+               "21:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "22:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "23:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "24:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "25:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "26:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "27:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "28:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "DCACHE [%1+#-64], D0Ar6\n"                             \
+               "BR     $Lloop"id"\n"                                   \
+                                                                       \
+               "MOV    RAPF, %1\n"                                     \
+               "29:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "30:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "31:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "32:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "33:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "34:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %3, %3, #16\n"                                  \
+               "35:\n"                                                 \
+               "MGETD  D0FrT, D0.5, D0.6, D0.7, [%1++]\n"              \
+               "36:\n"                                                 \
+               "MSETD  [%0++], D0FrT, D0.5, D0.6, D0.7\n"              \
+               "SUB    %0, %0, #4\n"                                   \
+               "37:\n"                                                 \
+               "SETD   [%0++], D0.7\n"                                 \
+               "SUB    %3, %3, #16\n"                                  \
+               "1:"                                                    \
+               "DCACHE [%1+#-64], D0Ar6\n"                             \
+               "GETL    D0Ar6, D1Ar5, [A0StP+#-40]\n"                  \
+               "GETL    D0FrT, D1RtP, [A0StP+#-32]\n"                  \
+               "GETL    D0.5, D1.5, [A0StP+#-24]\n"                    \
+               "GETL    D0.6, D1.6, [A0StP+#-16]\n"                    \
+               "GETL    D0.7, D1.7, [A0StP+#-8]\n"                     \
+               "SUB A0StP, A0StP, #40\n"                               \
+               "       .section .fixup,\"ax\"\n"                       \
+               "4:\n"                                                  \
+               "       ADD             %0, %0, #4\n"                   \
+               "3:\n"                                                  \
+               "       MOV     D0Ar2, TXSTATUS\n"                      \
+               "       MOV     D1Ar1, TXSTATUS\n"                      \
+               "       AND     D1Ar1, D1Ar1, #0xFFFFF8FF\n"            \
+               "       MOV     TXSTATUS, D1Ar1\n"                      \
+                       FIXUP                                           \
+               "       MOVT    D0Ar2,#HI(1b)\n"                        \
+               "       JUMP    D0Ar2,#LO(1b)\n"                        \
+               "       .previous\n"                                    \
+               "       .section __ex_table,\"a\"\n"                    \
+               "       .long 21b,3b\n"                                 \
+               "       .long 22b,3b\n"                                 \
+               "       .long 23b,3b\n"                                 \
+               "       .long 24b,3b\n"                                 \
+               "       .long 25b,3b\n"                                 \
+               "       .long 26b,3b\n"                                 \
+               "       .long 27b,3b\n"                                 \
+               "       .long 28b,3b\n"                                 \
+               "       .long 29b,3b\n"                                 \
+               "       .long 30b,3b\n"                                 \
+               "       .long 31b,3b\n"                                 \
+               "       .long 32b,3b\n"                                 \
+               "       .long 33b,3b\n"                                 \
+               "       .long 34b,3b\n"                                 \
+               "       .long 35b,3b\n"                                 \
+               "       .long 36b,3b\n"                                 \
+               "       .long 37b,4b\n"                                 \
+               "       .previous\n"                                    \
+               : "=r" (to), "=r" (from), "=r" (ret), "=d" (n)          \
+               : "0" (to), "1" (from), "2" (ret), "3" (n)              \
+               : "D1Ar1", "D0Ar2", "memory")
+
+/*     rewind 'to' and 'from'  pointers when a fault occurs
+ *
+ *     Rationale:
+ *             A fault always occurs on writing to user buffer. A fault
+ *             is at a single address, so we need to rewind by only 4
+ *             bytes.
+ *             Since we do a complete read from kernel buffer before
+ *             writing, we need to rewind it also. The amount to be
+ *             rewind equals the number of faulty writes in MSETD
+ *             which is: [4 - (LSM_STEP-1)]*4
+ *             LSM_STEP is bits 10:8 in TXSTATUS which is already read
+ *             and stored in D0Ar2
+ *
+ *             NOTE: If a fault occurs at the last operation in M{G,S}ETL
+ *                     LSM_STEP will be 0. ie: we do 4 writes in our case, if
+ *                     a fault happens at the 4th write, LSM_STEP will be 0
+ *                     instead of 4. The code copes with that.
+ *
+ *             n is updated by the number of successful writes, which is:
+ *             n = n - (LSM_STEP-1)*4
+ */
+#define __asm_copy_to_user_32bit_rapf_loop(to, from, ret, n, id)\
+       __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,           \
+               "LSR    D0Ar2, D0Ar2, #8\n"                             \
+               "AND    D0Ar2, D0Ar2, #0x7\n"                           \
+               "ADDZ   D0Ar2, D0Ar2, #4\n"                             \
+               "SUB    D0Ar2, D0Ar2, #1\n"                             \
+               "MOV    D1Ar1, #4\n"                                    \
+               "SUB    D0Ar2, D1Ar1, D0Ar2\n"                          \
+               "LSL    D0Ar2, D0Ar2, #2\n"                             \
+               "LSL    D1Ar1, D1Ar1, #2\n"                             \
+               "SUB    D1Ar1, D1Ar1, D0Ar2\n"                          \
+               "SUB    %0, %0, #4\n"                                   \
+               "SUB    %1,     %1,     D0Ar2\n"                        \
+               "SUB    %3, %3, D1Ar1\n")
+
+unsigned long __copy_user(void __user *pdst, const void *psrc,
+                         unsigned long n)
+{
+       register char __user *dst asm ("A0.2") = pdst;
+       register const char *src asm ("A1.2") = psrc;
+       unsigned long retn = 0;
+
+       if (n == 0)
+               return 0;
+
+       if ((unsigned long) src & 1) {
+               __asm_copy_to_user_1(dst, src, retn);
+               n--;
+       }
+       if ((unsigned long) dst & 1) {
+               /* Worst case - byte copy */
+               while (n > 0) {
+                       __asm_copy_to_user_1(dst, src, retn);
+                       n--;
+               }
+       }
+       if (((unsigned long) src & 2) && n >= 2) {
+               __asm_copy_to_user_2(dst, src, retn);
+               n -= 2;
+       }
+       if ((unsigned long) dst & 2) {
+               /* Second worst case - word copy */
+               while (n >= 2) {
+                       __asm_copy_to_user_2(dst, src, retn);
+                       n -= 2;
+               }
+       }
+
+#ifdef USE_RAPF
+       /* 64 bit copy loop */
+       if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+               if (n >= RAPF_MIN_BUF_SIZE) {
+                       /* copy user using 64 bit rapf copy */
+                       __asm_copy_to_user_64bit_rapf_loop(dst, src, retn,
+                                                       n, "64cu");
+               }
+               while (n >= 8) {
+                       __asm_copy_to_user_8x64(dst, src, retn);
+                       n -= 8;
+               }
+       }
+       if (n >= RAPF_MIN_BUF_SIZE) {
+               /* copy user using 32 bit rapf copy */
+               __asm_copy_to_user_32bit_rapf_loop(dst, src, retn, n, "32cu");
+       }
+#else
+       /* 64 bit copy loop */
+       if (!(((unsigned long) src | (__force unsigned long) dst) & 7)) {
+               while (n >= 8) {
+                       __asm_copy_to_user_8x64(dst, src, retn);
+                       n -= 8;
+               }
+       }
+#endif
+
+       while (n >= 16) {
+               __asm_copy_to_user_16(dst, src, retn);
+               n -= 16;
+       }
+
+       while (n >= 4) {
+               __asm_copy_to_user_4(dst, src, retn);
+               n -= 4;
+       }
+
+       switch (n) {
+       case 0:
+               break;
+       case 1:
+               __asm_copy_to_user_1(dst, src, retn);
+               break;
+       case 2:
+               __asm_copy_to_user_2(dst, src, retn);
+               break;
+       case 3:
+               __asm_copy_to_user_3(dst, src, retn);
+               break;
+       }
+
+       return retn;
+}
+EXPORT_SYMBOL(__copy_user);
+
+#define __asm_copy_from_user_1(to, from, ret) \
+       __asm_copy_user_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"    \
+               "2:     SETB [%0++],D1Ar1\n",   \
+               "3:     ADD  %2,%2,#1\n"        \
+               "       SETB [%0++],D1Ar1\n",   \
+               "       .long 2b,3b\n")
+
+#define __asm_copy_from_user_2x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_user_cont(to, from, ret,             \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "2:     SETW [%0++],D1Ar1\n" COPY,      \
+               "3:     ADD  %2,%2,#2\n"                \
+               "       SETW [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_2(to, from, ret) \
+       __asm_copy_from_user_2x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_3(to, from, ret)          \
+       __asm_copy_from_user_2x_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "4:     SETB [%0++],D1Ar1\n",           \
+               "5:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 4b,5b\n")
+
+#define __asm_copy_from_user_4x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_user_cont(to, from, ret,             \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "2:     SETD [%0++],D1Ar1\n" COPY,      \
+               "3:     ADD  %2,%2,#4\n"                \
+               "       SETD [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 2b,3b\n" TENTRY)
+
+#define __asm_copy_from_user_4(to, from, ret) \
+       __asm_copy_from_user_4x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_5(to, from, ret) \
+       __asm_copy_from_user_4x_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "4:     SETB [%0++],D1Ar1\n",           \
+               "5:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 4b,5b\n")
+
+#define __asm_copy_from_user_6x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_4x_cont(to, from, ret,     \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "4:     SETW [%0++],D1Ar1\n" COPY,      \
+               "5:     ADD  %2,%2,#2\n"                \
+               "       SETW [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_6(to, from, ret) \
+       __asm_copy_from_user_6x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_7(to, from, ret) \
+       __asm_copy_from_user_6x_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "6:     SETB [%0++],D1Ar1\n",           \
+               "7:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 6b,7b\n")
+
+#define __asm_copy_from_user_8x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_4x_cont(to, from, ret,     \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "4:     SETD [%0++],D1Ar1\n" COPY,      \
+               "5:     ADD  %2,%2,#4\n"                        \
+               "       SETD [%0++],D1Ar1\n" FIXUP,             \
+               "       .long 4b,5b\n" TENTRY)
+
+#define __asm_copy_from_user_8(to, from, ret) \
+       __asm_copy_from_user_8x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_9(to, from, ret) \
+       __asm_copy_from_user_8x_cont(to, from, ret,     \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "6:     SETB [%0++],D1Ar1\n",           \
+               "7:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 6b,7b\n")
+
+#define __asm_copy_from_user_10x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_8x_cont(to, from, ret,     \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "6:     SETW [%0++],D1Ar1\n" COPY,      \
+               "7:     ADD  %2,%2,#2\n"                \
+               "       SETW [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_10(to, from, ret) \
+       __asm_copy_from_user_10x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_11(to, from, ret)         \
+       __asm_copy_from_user_10x_cont(to, from, ret,    \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "8:     SETB [%0++],D1Ar1\n",           \
+               "9:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 8b,9b\n")
+
+#define __asm_copy_from_user_12x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_8x_cont(to, from, ret,     \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "6:     SETD [%0++],D1Ar1\n" COPY,      \
+               "7:     ADD  %2,%2,#4\n"                \
+               "       SETD [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 6b,7b\n" TENTRY)
+
+#define __asm_copy_from_user_12(to, from, ret) \
+       __asm_copy_from_user_12x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_13(to, from, ret) \
+       __asm_copy_from_user_12x_cont(to, from, ret,    \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "8:     SETB [%0++],D1Ar1\n",           \
+               "9:     ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 8b,9b\n")
+
+#define __asm_copy_from_user_14x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_12x_cont(to, from, ret,    \
+               "       GETW D1Ar1,[%1++]\n"            \
+               "8:     SETW [%0++],D1Ar1\n" COPY,      \
+               "9:     ADD  %2,%2,#2\n"                \
+               "       SETW [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_14(to, from, ret) \
+       __asm_copy_from_user_14x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_15(to, from, ret) \
+       __asm_copy_from_user_14x_cont(to, from, ret,    \
+               "       GETB D1Ar1,[%1++]\n"            \
+               "10:    SETB [%0++],D1Ar1\n",           \
+               "11:    ADD  %2,%2,#1\n"                \
+               "       SETB [%0++],D1Ar1\n",           \
+               "       .long 10b,11b\n")
+
+#define __asm_copy_from_user_16x_cont(to, from, ret, COPY, FIXUP, TENTRY) \
+       __asm_copy_from_user_12x_cont(to, from, ret,    \
+               "       GETD D1Ar1,[%1++]\n"            \
+               "8:     SETD [%0++],D1Ar1\n" COPY,      \
+               "9:     ADD  %2,%2,#4\n"                \
+               "       SETD [%0++],D1Ar1\n" FIXUP,     \
+               "       .long 8b,9b\n" TENTRY)
+
+#define __asm_copy_from_user_16(to, from, ret) \
+       __asm_copy_from_user_16x_cont(to, from, ret, "", "", "")
+
+#define __asm_copy_from_user_8x64(to, from, ret) \
+       asm volatile (                          \
+               "       GETL D0Ar2,D1Ar1,[%1++]\n"      \
+               "2:     SETL [%0++],D0Ar2,D1Ar1\n"      \
+               "1:\n"                                  \
+               "       .section .fixup,\"ax\"\n"       \
+               "       MOV D1Ar1,#0\n"                 \
+               "       MOV D0Ar2,#0\n"                 \
+               "3:     ADD  %2,%2,#8\n"                \
+               "       SETL [%0++],D0Ar2,D1Ar1\n"      \
+               "       MOVT    D0Ar2,#HI(1b)\n"        \
+               "       JUMP    D0Ar2,#LO(1b)\n"        \
+               "       .previous\n"                    \
+               "       .section __ex_table,\"a\"\n"    \
+               "       .long 2b,3b\n"                  \
+               "       .previous\n"                    \
+               : "=a" (to), "=r" (from), "=r" (ret)    \
+               : "0" (to), "1" (from), "2" (ret)       \
+               : "D1Ar1", "D0Ar2", "memory")
+
+/*     rewind 'from' pointer when a fault occurs
+ *
+ *     Rationale:
+ *             A fault occurs while reading from user buffer, which is the
+ *             source. Since the fault is at a single address, we only
+ *             need to rewind by 8 bytes.
+ *             Since we don't write to kernel buffer until we read first,
+ *             the kernel buffer is at the right state and needn't be
+ *             corrected.
+ */
+#define __asm_copy_from_user_64bit_rapf_loop(to, from, ret, n, id)     \
+       __asm_copy_user_64bit_rapf_loop(to, from, ret, n, id,           \
+               "SUB    %1, %1, #8\n")
+
+/*     rewind 'from' pointer when a fault occurs
+ *
+ *     Rationale:
+ *             A fault occurs while reading from user buffer, which is the
+ *             source. Since the fault is at a single address, we only
+ *             need to rewind by 4 bytes.
+ *             Since we don't write to kernel buffer until we read first,
+ *             the kernel buffer is at the right state and needn't be
+ *             corrected.
+ */
+#define __asm_copy_from_user_32bit_rapf_loop(to, from, ret, n, id)     \
+       __asm_copy_user_32bit_rapf_loop(to, from, ret, n, id,           \
+               "SUB    %1, %1, #4\n")
+
+
+/* Copy from user to kernel, zeroing the bytes that were inaccessible in
+   userland.  The return-value is the number of bytes that were
+   inaccessible.  */
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+                                 unsigned long n)
+{
+       register char *dst asm ("A0.2") = pdst;
+       register const char __user *src asm ("A1.2") = psrc;
+       unsigned long retn = 0;
+
+       if (n == 0)
+               return 0;
+
+       if ((unsigned long) src & 1) {
+               __asm_copy_from_user_1(dst, src, retn);
+               n--;
+       }
+       if ((unsigned long) dst & 1) {
+               /* Worst case - byte copy */
+               while (n > 0) {
+                       __asm_copy_from_user_1(dst, src, retn);
+                       n--;
+                       if (retn)
+                               goto copy_exception_bytes;
+               }
+       }
+       if (((unsigned long) src & 2) && n >= 2) {
+               __asm_copy_from_user_2(dst, src, retn);
+               n -= 2;
+       }
+       if ((unsigned long) dst & 2) {
+               /* Second worst case - word copy */
+               while (n >= 2) {
+                       __asm_copy_from_user_2(dst, src, retn);
+                       n -= 2;
+                       if (retn)
+                               goto copy_exception_bytes;
+               }
+       }
+
+       /* We only need one check after the unalignment-adjustments,
+          because if both adjustments were done, either both or
+          neither reference had an exception.  */
+       if (retn != 0)
+               goto copy_exception_bytes;
+
+#ifdef USE_RAPF
+       /* 64 bit copy loop */
+       if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+               if (n >= RAPF_MIN_BUF_SIZE) {
+                       /* Copy using fast 64bit rapf */
+                       __asm_copy_from_user_64bit_rapf_loop(dst, src, retn,
+                                                       n, "64cuz");
+               }
+               while (n >= 8) {
+                       __asm_copy_from_user_8x64(dst, src, retn);
+                       n -= 8;
+                       if (retn)
+                               goto copy_exception_bytes;
+               }
+       }
+
+       if (n >= RAPF_MIN_BUF_SIZE) {
+               /* Copy using fast 32bit rapf */
+               __asm_copy_from_user_32bit_rapf_loop(dst, src, retn,
+                                               n, "32cuz");
+       }
+#else
+       /* 64 bit copy loop */
+       if (!(((unsigned long) src | (unsigned long) dst) & 7)) {
+               while (n >= 8) {
+                       __asm_copy_from_user_8x64(dst, src, retn);
+                       n -= 8;
+                       if (retn)
+                               goto copy_exception_bytes;
+               }
+       }
+#endif
+
+       while (n >= 4) {
+               __asm_copy_from_user_4(dst, src, retn);
+               n -= 4;
+
+               if (retn)
+                       goto copy_exception_bytes;
+       }
+
+       /* If we get here, there were no memory read faults.  */
+       switch (n) {
+               /* These copies are at least "naturally aligned" (so we don't
+                  have to check each byte), due to the src alignment code.
+                  The *_3 case *will* get the correct count for retn.  */
+       case 0:
+               /* This case deliberately left in (if you have doubts check the
+                  generated assembly code).  */
+               break;
+       case 1:
+               __asm_copy_from_user_1(dst, src, retn);
+               break;
+       case 2:
+               __asm_copy_from_user_2(dst, src, retn);
+               break;
+       case 3:
+               __asm_copy_from_user_3(dst, src, retn);
+               break;
+       }
+
+       /* If we get here, retn correctly reflects the number of failing
+          bytes.  */
+       return retn;
+
+ copy_exception_bytes:
+       /* We already have "retn" bytes cleared, and need to clear the
+          remaining "n" bytes.  A non-optimized simple byte-for-byte in-line
+          memset is preferred here, since this isn't speed-critical code and
+          we'd rather have this a leaf-function than calling memset.  */
+       {
+               char *endp;
+               for (endp = dst + n; dst < endp; dst++)
+                       *dst = 0;
+       }
+
+       return retn + n;
+}
+EXPORT_SYMBOL(__copy_user_zeroing);
+
+#define __asm_clear_8x64(to, ret) \
+       asm volatile (                                  \
+               "       MOV  D0Ar2,#0\n"                \
+               "       MOV  D1Ar1,#0\n"                \
+               "       SETL [%0],D0Ar2,D1Ar1\n"        \
+               "2:     SETL [%0++],D0Ar2,D1Ar1\n"      \
+               "1:\n"                                  \
+               "       .section .fixup,\"ax\"\n"       \
+               "3:     ADD  %1,%1,#8\n"                \
+               "       MOVT    D0Ar2,#HI(1b)\n"        \
+               "       JUMP    D0Ar2,#LO(1b)\n"        \
+               "       .previous\n"                    \
+               "       .section __ex_table,\"a\"\n"    \
+               "       .long 2b,3b\n"                  \
+               "       .previous\n"                    \
+               : "=r" (to), "=r" (ret) \
+               : "0" (to), "1" (ret)   \
+               : "D1Ar1", "D0Ar2", "memory")
+
+/* Zero userspace.  */
+
+#define __asm_clear(to, ret, CLEAR, FIXUP, TENTRY) \
+       asm volatile (                                  \
+               "       MOV D1Ar1,#0\n"                 \
+                       CLEAR                           \
+               "1:\n"                                  \
+               "       .section .fixup,\"ax\"\n"       \
+                       FIXUP                           \
+               "       MOVT    D1Ar1,#HI(1b)\n"        \
+               "       JUMP    D1Ar1,#LO(1b)\n"        \
+               "       .previous\n"                    \
+               "       .section __ex_table,\"a\"\n"    \
+                       TENTRY                          \
+               "       .previous"                      \
+               : "=r" (to), "=r" (ret)                 \
+               : "0" (to), "1" (ret)                   \
+               : "D1Ar1", "memory")
+
+#define __asm_clear_1(to, ret) \
+       __asm_clear(to, ret,                    \
+               "       SETB [%0],D1Ar1\n"      \
+               "2:     SETB [%0++],D1Ar1\n",   \
+               "3:     ADD  %1,%1,#1\n",       \
+               "       .long 2b,3b\n")
+
+#define __asm_clear_2(to, ret) \
+       __asm_clear(to, ret,                    \
+               "       SETW [%0],D1Ar1\n"      \
+               "2:     SETW [%0++],D1Ar1\n",   \
+               "3:     ADD  %1,%1,#2\n",       \
+               "       .long 2b,3b\n")
+
+#define __asm_clear_3(to, ret) \
+       __asm_clear(to, ret,                    \
+                "2:    SETW [%0++],D1Ar1\n"    \
+                "      SETB [%0],D1Ar1\n"      \
+                "3:    SETB [%0++],D1Ar1\n",   \
+                "4:    ADD  %1,%1,#2\n"        \
+                "5:    ADD  %1,%1,#1\n",       \
+                "      .long 2b,4b\n"          \
+                "      .long 3b,5b\n")
+
+#define __asm_clear_4x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+       __asm_clear(to, ret,                            \
+               "       SETD [%0],D1Ar1\n"              \
+               "2:     SETD [%0++],D1Ar1\n" CLEAR,     \
+               "3:     ADD  %1,%1,#4\n" FIXUP,         \
+               "       .long 2b,3b\n" TENTRY)
+
+#define __asm_clear_4(to, ret) \
+       __asm_clear_4x_cont(to, ret, "", "", "")
+
+#define __asm_clear_8x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+       __asm_clear_4x_cont(to, ret,                    \
+               "       SETD [%0],D1Ar1\n"              \
+               "4:     SETD [%0++],D1Ar1\n" CLEAR,     \
+               "5:     ADD  %1,%1,#4\n" FIXUP,         \
+               "       .long 4b,5b\n" TENTRY)
+
+#define __asm_clear_8(to, ret) \
+       __asm_clear_8x_cont(to, ret, "", "", "")
+
+#define __asm_clear_12x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+       __asm_clear_8x_cont(to, ret,                    \
+               "       SETD [%0],D1Ar1\n"              \
+               "6:     SETD [%0++],D1Ar1\n" CLEAR,     \
+               "7:     ADD  %1,%1,#4\n" FIXUP,         \
+               "       .long 6b,7b\n" TENTRY)
+
+#define __asm_clear_12(to, ret) \
+       __asm_clear_12x_cont(to, ret, "", "", "")
+
+#define __asm_clear_16x_cont(to, ret, CLEAR, FIXUP, TENTRY) \
+       __asm_clear_12x_cont(to, ret,                   \
+               "       SETD [%0],D1Ar1\n"              \
+               "8:     SETD [%0++],D1Ar1\n" CLEAR,     \
+               "9:     ADD  %1,%1,#4\n" FIXUP,         \
+               "       .long 8b,9b\n" TENTRY)
+
+#define __asm_clear_16(to, ret) \
+       __asm_clear_16x_cont(to, ret, "", "", "")
+
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
+{
+       register char __user *dst asm ("D0Re0") = pto;
+       register unsigned long n asm ("D1Re0") = pn;
+       register unsigned long retn asm ("D0Ar6") = 0;
+
+       if ((unsigned long) dst & 1) {
+               __asm_clear_1(dst, retn);
+               n--;
+       }
+
+       if ((unsigned long) dst & 2) {
+               __asm_clear_2(dst, retn);
+               n -= 2;
+       }
+
+       /* 64 bit copy loop */
+       if (!((__force unsigned long) dst & 7)) {
+               while (n >= 8) {
+                       __asm_clear_8x64(dst, retn);
+                       n -= 8;
+               }
+       }
+
+       while (n >= 16) {
+               __asm_clear_16(dst, retn);
+               n -= 16;
+       }
+
+       while (n >= 4) {
+               __asm_clear_4(dst, retn);
+               n -= 4;
+       }
+
+       switch (n) {
+       case 0:
+               break;
+       case 1:
+               __asm_clear_1(dst, retn);
+               break;
+       case 2:
+               __asm_clear_2(dst, retn);
+               break;
+       case 3:
+               __asm_clear_3(dst, retn);
+               break;
+       }
+
+       return retn;
+}
+EXPORT_SYMBOL(__do_clear_user);
+
+unsigned char __get_user_asm_b(const void __user *addr, long *err)
+{
+       register unsigned char x asm ("D0Re0") = 0;
+       asm volatile (
+               "       GETB %0,[%2]\n"
+               "1:\n"
+               "       GETB %0,[%2]\n"
+               "2:\n"
+               "       .section .fixup,\"ax\"\n"
+               "3:     MOV     D0FrT,%3\n"
+               "       SETD    [%1],D0FrT\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               "       .previous\n"
+               "       .section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               "       .previous\n"
+               : "=r" (x)
+               : "r" (err), "r" (addr), "P" (-EFAULT)
+               : "D0FrT");
+       return x;
+}
+EXPORT_SYMBOL(__get_user_asm_b);
+
+unsigned short __get_user_asm_w(const void __user *addr, long *err)
+{
+       register unsigned short x asm ("D0Re0") = 0;
+       asm volatile (
+               "       GETW %0,[%2]\n"
+               "1:\n"
+               "       GETW %0,[%2]\n"
+               "2:\n"
+               "       .section .fixup,\"ax\"\n"
+               "3:     MOV     D0FrT,%3\n"
+               "       SETD    [%1],D0FrT\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               "       .previous\n"
+               "       .section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               "       .previous\n"
+               : "=r" (x)
+               : "r" (err), "r" (addr), "P" (-EFAULT)
+               : "D0FrT");
+       return x;
+}
+EXPORT_SYMBOL(__get_user_asm_w);
+
+unsigned int __get_user_asm_d(const void __user *addr, long *err)
+{
+       register unsigned int x asm ("D0Re0") = 0;
+       asm volatile (
+               "       GETD %0,[%2]\n"
+               "1:\n"
+               "       GETD %0,[%2]\n"
+               "2:\n"
+               "       .section .fixup,\"ax\"\n"
+               "3:     MOV     D0FrT,%3\n"
+               "       SETD    [%1],D0FrT\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               "       .previous\n"
+               "       .section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               "       .previous\n"
+               : "=r" (x)
+               : "r" (err), "r" (addr), "P" (-EFAULT)
+               : "D0FrT");
+       return x;
+}
+EXPORT_SYMBOL(__get_user_asm_d);
+
+long __put_user_asm_b(unsigned int x, void __user *addr)
+{
+       register unsigned int err asm ("D0Re0") = 0;
+       asm volatile (
+               "       MOV  %0,#0\n"
+               "       SETB [%2],%1\n"
+               "1:\n"
+               "       SETB [%2],%1\n"
+               "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     MOV     %0,%3\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               ".previous"
+               : "=r"(err)
+               : "d" (x), "a" (addr), "P"(-EFAULT)
+               : "D0FrT");
+       return err;
+}
+EXPORT_SYMBOL(__put_user_asm_b);
+
+long __put_user_asm_w(unsigned int x, void __user *addr)
+{
+       register unsigned int err asm ("D0Re0") = 0;
+       asm volatile (
+               "       MOV  %0,#0\n"
+               "       SETW [%2],%1\n"
+               "1:\n"
+               "       SETW [%2],%1\n"
+               "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     MOV     %0,%3\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               ".previous"
+               : "=r"(err)
+               : "d" (x), "a" (addr), "P"(-EFAULT)
+               : "D0FrT");
+       return err;
+}
+EXPORT_SYMBOL(__put_user_asm_w);
+
+long __put_user_asm_d(unsigned int x, void __user *addr)
+{
+       register unsigned int err asm ("D0Re0") = 0;
+       asm volatile (
+               "       MOV  %0,#0\n"
+               "       SETD [%2],%1\n"
+               "1:\n"
+               "       SETD [%2],%1\n"
+               "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     MOV     %0,%3\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               ".previous"
+               : "=r"(err)
+               : "d" (x), "a" (addr), "P"(-EFAULT)
+               : "D0FrT");
+       return err;
+}
+EXPORT_SYMBOL(__put_user_asm_d);
+
+long __put_user_asm_l(unsigned long long x, void __user *addr)
+{
+       register unsigned int err asm ("D0Re0") = 0;
+       asm volatile (
+               "       MOV  %0,#0\n"
+               "       SETL [%2],%1,%t1\n"
+               "1:\n"
+               "       SETL [%2],%1,%t1\n"
+               "2:\n"
+               ".section .fixup,\"ax\"\n"
+               "3:     MOV     %0,%3\n"
+               "       MOVT    D0FrT,#HI(2b)\n"
+               "       JUMP    D0FrT,#LO(2b)\n"
+               ".previous\n"
+               ".section __ex_table,\"a\"\n"
+               "       .long 1b,3b\n"
+               ".previous"
+               : "=r"(err)
+               : "d" (x), "a" (addr), "P"(-EFAULT)
+               : "D0FrT");
+       return err;
+}
+EXPORT_SYMBOL(__put_user_asm_l);
+
+long strnlen_user(const char __user *src, long count)
+{
+       long res;
+
+       if (!access_ok(VERIFY_READ, src, 0))
+               return 0;
+
+       asm volatile (" MOV     D0Ar4, %1\n"
+                     " MOV     D0Ar6, %2\n"
+                     "0:\n"
+                     " SUBS    D0FrT, D0Ar6, #0\n"
+                     " SUB     D0Ar6, D0Ar6, #1\n"
+                     " BLE     2f\n"
+                     " GETB    D0FrT, [D0Ar4+#1++]\n"
+                     "1:\n"
+                     " TST     D0FrT, #255\n"
+                     " BNE     0b\n"
+                     "2:\n"
+                     " SUB     %0, %2, D0Ar6\n"
+                     "3:\n"
+                     " .section .fixup,\"ax\"\n"
+                     "4:\n"
+                     " MOV     %0, #0\n"
+                     " MOVT    D0FrT,#HI(3b)\n"
+                     " JUMP    D0FrT,#LO(3b)\n"
+                     " .previous\n"
+                     " .section __ex_table,\"a\"\n"
+                     " .long 1b,4b\n"
+                     " .previous\n"
+                     : "=r" (res)
+                     : "r" (src), "r" (count)
+                     : "D0FrT", "D0Ar4", "D0Ar6", "cc");
+
+       return res;
+}
+EXPORT_SYMBOL(strnlen_user);
+
+long __strncpy_from_user(char *dst, const char __user *src, long count)
+{
+       long res;
+
+       if (count == 0)
+               return 0;
+
+       /*
+        * Currently, in 2.4.0-test9, most ports use a simple byte-copy loop.
+        *  So do we.
+        *
+        *  This code is deduced from:
+        *
+        *      char tmp2;
+        *      long tmp1, tmp3;
+        *      tmp1 = count;
+        *      while ((*dst++ = (tmp2 = *src++)) != 0
+        *             && --tmp1)
+        *        ;
+        *
+        *      res = count - tmp1;
+        *
+        *  with tweaks.
+        */
+
+       asm volatile (" MOV  %0,%3\n"
+                     "1:\n"
+                     " GETB D0FrT,[%2++]\n"
+                     "2:\n"
+                     " CMP  D0FrT,#0\n"
+                     " SETB [%1++],D0FrT\n"
+                     " BEQ  3f\n"
+                     " SUBS %0,%0,#1\n"
+                     " BNZ  1b\n"
+                     "3:\n"
+                     " SUB  %0,%3,%0\n"
+                     "4:\n"
+                     " .section .fixup,\"ax\"\n"
+                     "5:\n"
+                     " MOV  %0,%7\n"
+                     " MOVT    D0FrT,#HI(4b)\n"
+                     " JUMP    D0FrT,#LO(4b)\n"
+                     " .previous\n"
+                     " .section __ex_table,\"a\"\n"
+                     " .long 2b,5b\n"
+                     " .previous"
+                     : "=r" (res), "=r" (dst), "=r" (src), "=r" (count)
+                     : "3" (count), "1" (dst), "2" (src), "P" (-EFAULT)
+                     : "D0FrT", "memory", "cc");
+
+       return res;
+}
+EXPORT_SYMBOL(__strncpy_from_user);
diff --git a/arch/metag/mm/Kconfig b/arch/metag/mm/Kconfig
new file mode 100644 (file)
index 0000000..cd7f2f2
--- /dev/null
@@ -0,0 +1,153 @@
+menu "Memory management options"
+
+config PAGE_OFFSET
+       hex "Kernel page offset address"
+       default "0x40000000"
+       help
+         This option allows you to set the virtual address at which the
+         kernel will be mapped to.
+endmenu
+
+config KERNEL_4M_PAGES
+       bool "Map kernel with 4MB pages"
+       depends on METAG_META21_MMU
+       default y
+       help
+         Map the kernel with large pages to reduce TLB pressure.
+
+choice
+       prompt "User page size"
+       default PAGE_SIZE_4K
+
+config PAGE_SIZE_4K
+       bool "4kB"
+       help
+         This is the default page size used by all Meta cores.
+
+config PAGE_SIZE_8K
+       bool "8kB"
+       depends on METAG_META21_MMU
+       help
+         This enables 8kB pages as supported by Meta 2.x and later MMUs.
+
+config PAGE_SIZE_16K
+       bool "16kB"
+       depends on METAG_META21_MMU
+       help
+         This enables 16kB pages as supported by Meta 2.x and later MMUs.
+
+endchoice
+
+config NUMA
+       bool "Non Uniform Memory Access (NUMA) Support"
+       help
+         Some Meta systems have MMU-mappable on-chip memories with
+         lower latencies than main memory. This enables support for
+         these blocks by binding them to nodes and allowing
+         memory policies to be used for prioritizing and controlling
+         allocation behaviour.
+
+config FORCE_MAX_ZONEORDER
+       int "Maximum zone order"
+       range 10 32
+       default "10"
+       help
+         The kernel memory allocator divides physically contiguous memory
+         blocks into "zones", where each zone is a power of two number of
+         pages.  This option selects the largest power of two that the kernel
+         keeps in the memory allocator.  If you need to allocate very large
+         blocks of physically contiguous memory, then you may need to
+         increase this value.
+
+         This config option is actually maximum order plus one. For example,
+         a value of 11 means that the largest free memory block is 2^10 pages.
+
+         The page size is not necessarily 4KB.  Keep this in mind
+         when choosing a value for this option.
+
+config METAG_L2C
+       bool "Level 2 Cache Support"
+       depends on METAG_META21
+       help
+         Press y here to enable support for the Meta Level 2 (L2) cache. This
+         will enable the cache at start up if it hasn't already been enabled
+         by the bootloader.
+
+         If the bootloader enables the L2 you must press y here to ensure the
+         kernel takes the appropriate actions to keep the cache coherent.
+
+config NODES_SHIFT
+       int
+       default "1"
+       depends on NEED_MULTIPLE_NODES
+
+config ARCH_FLATMEM_ENABLE
+       def_bool y
+       depends on !NUMA
+
+config ARCH_SPARSEMEM_ENABLE
+       def_bool y
+       select SPARSEMEM_STATIC
+
+config ARCH_SPARSEMEM_DEFAULT
+       def_bool y
+
+config MAX_ACTIVE_REGIONS
+       int
+       default "2" if SPARSEMEM
+       default "1"
+
+config ARCH_POPULATES_NODE_MAP
+       def_bool y
+
+config ARCH_SELECT_MEMORY_MODEL
+       def_bool y
+
+config SYS_SUPPORTS_HUGETLBFS
+       def_bool y
+       depends on METAG_META21_MMU
+
+choice
+       prompt "HugeTLB page size"
+       depends on METAG_META21_MMU && HUGETLB_PAGE
+       default HUGETLB_PAGE_SIZE_1M
+
+config HUGETLB_PAGE_SIZE_8K
+       bool "8kB"
+       depends on PAGE_SIZE_4K
+
+config HUGETLB_PAGE_SIZE_16K
+       bool "16kB"
+       depends on PAGE_SIZE_4K || PAGE_SIZE_8K
+
+config HUGETLB_PAGE_SIZE_32K
+       bool "32kB"
+
+config HUGETLB_PAGE_SIZE_64K
+       bool "64kB"
+
+config HUGETLB_PAGE_SIZE_128K
+       bool "128kB"
+
+config HUGETLB_PAGE_SIZE_256K
+       bool "256kB"
+
+config HUGETLB_PAGE_SIZE_512K
+       bool "512kB"
+
+config HUGETLB_PAGE_SIZE_1M
+       bool "1MB"
+
+config HUGETLB_PAGE_SIZE_2M
+       bool "2MB"
+
+config HUGETLB_PAGE_SIZE_4M
+       bool "4MB"
+
+endchoice
+
+config METAG_COREMEM
+       bool
+       default y if SUSPEND
+
+source "mm/Kconfig"
diff --git a/arch/metag/mm/Makefile b/arch/metag/mm/Makefile
new file mode 100644 (file)
index 0000000..9943311
--- /dev/null
@@ -0,0 +1,19 @@
+#
+# Makefile for the linux Meta-specific parts of the memory manager.
+#
+
+obj-y                          += cache.o
+obj-y                          += extable.o
+obj-y                          += fault.o
+obj-y                          += init.o
+obj-y                          += ioremap.o
+obj-y                          += maccess.o
+
+mmu-y                          := mmu-meta1.o
+mmu-$(CONFIG_METAG_META21_MMU) := mmu-meta2.o
+obj-y                          += $(mmu-y)
+
+obj-$(CONFIG_HIGHMEM)          += highmem.o
+obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
+obj-$(CONFIG_METAG_L2C)                += l2cache.o
+obj-$(CONFIG_NUMA)             += numa.o
diff --git a/arch/metag/mm/cache.c b/arch/metag/mm/cache.c
new file mode 100644 (file)
index 0000000..b5d3b2e
--- /dev/null
@@ -0,0 +1,521 @@
+/*
+ * arch/metag/mm/cache.c
+ *
+ * Copyright (C) 2001, 2002, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Cache control code
+ */
+
+#include <linux/export.h>
+#include <linux/io.h>
+#include <asm/cacheflush.h>
+#include <asm/core_reg.h>
+#include <asm/global_lock.h>
+#include <asm/metag_isa.h>
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+
+#define DEFAULT_CACHE_WAYS_LOG2        2
+
+/*
+ * Size of a set in the caches. Initialised for default 16K stride, adjusted
+ * according to values passed through TBI global heap segment via LDLK (on ATP)
+ * or config registers (on HTP/MTP)
+ */
+static int dcache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+                                       - DEFAULT_CACHE_WAYS_LOG2;
+static int icache_set_shift = METAG_TBI_CACHE_SIZE_BASE_LOG2
+                                       - DEFAULT_CACHE_WAYS_LOG2;
+/*
+ * The number of sets in the caches. Initialised for HTP/ATP, adjusted
+ * according to NOMMU setting in config registers
+ */
+static unsigned char dcache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+static unsigned char icache_sets_log2 = DEFAULT_CACHE_WAYS_LOG2;
+
+#ifndef CONFIG_METAG_META12
+/**
+ * metag_lnkget_probe() - Probe whether lnkget/lnkset go around the cache
+ */
+static volatile u32 lnkget_testdata[16] __initdata __aligned(64);
+
+#define LNKGET_CONSTANT 0xdeadbeef
+
+void __init metag_lnkget_probe(void)
+{
+       int temp;
+       long flags;
+
+       /*
+        * It's conceivable the user has configured a globally coherent cache
+        * shared with non-Linux hardware threads, so use LOCK2 to prevent them
+        * from executing and causing cache eviction during the test.
+        */
+       __global_lock2(flags);
+
+       /* read a value to bring it into the cache */
+       (void)lnkget_testdata[0];
+       lnkget_testdata[0] = 0;
+
+       /* lnkget/lnkset it to modify it */
+       asm volatile(
+               "1:     LNKGETD %0, [%1]\n"
+               "       LNKSETD [%1], %2\n"
+               "       DEFR    %0, TXSTAT\n"
+               "       ANDT    %0, %0, #HI(0x3f000000)\n"
+               "       CMPT    %0, #HI(0x02000000)\n"
+               "       BNZ     1b\n"
+               : "=&d" (temp)
+               : "da" (&lnkget_testdata[0]), "bd" (LNKGET_CONSTANT)
+               : "cc");
+
+       /* re-read it to see if the cached value changed */
+       temp = lnkget_testdata[0];
+
+       __global_unlock2(flags);
+
+       /* flush the cache line to fix any incoherency */
+       __builtin_dcache_flush((void *)&lnkget_testdata[0]);
+
+#if defined(CONFIG_METAG_LNKGET_AROUND_CACHE)
+       /* if the cache is right, LNKGET_AROUND_CACHE is unnecessary */
+       if (temp == LNKGET_CONSTANT)
+               pr_info("LNKGET/SET go through cache but CONFIG_METAG_LNKGET_AROUND_CACHE=y\n");
+#elif defined(CONFIG_METAG_ATOMICITY_LNKGET)
+       /*
+        * if the cache is wrong, LNKGET_AROUND_CACHE is really necessary
+        * because the kernel is configured to use LNKGET/SET for atomicity
+        */
+       WARN(temp != LNKGET_CONSTANT,
+            "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+            "Expect kernel failure as it's used for atomicity primitives\n");
+#elif defined(CONFIG_SMP)
+       /*
+        * if the cache is wrong, LNKGET_AROUND_CACHE should be used or the
+        * gateway page won't flush and userland could break.
+        */
+       WARN(temp != LNKGET_CONSTANT,
+            "LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n"
+            "Expect userland failure as it's used for user gateway page\n");
+#else
+       /*
+        * if the cache is wrong, LNKGET_AROUND_CACHE is set wrong, but it
+        * doesn't actually matter as it doesn't have any effect on !SMP &&
+        * !ATOMICITY_LNKGET.
+        */
+       if (temp != LNKGET_CONSTANT)
+               pr_warn("LNKGET/SET go around cache but CONFIG_METAG_LNKGET_AROUND_CACHE=n\n");
+#endif
+}
+#endif /* !CONFIG_METAG_META12 */
+
+/**
+ * metag_cache_probe() - Probe L1 cache configuration.
+ *
+ * Probe the L1 cache configuration to aid the L1 physical cache flushing
+ * functions.
+ */
+void __init metag_cache_probe(void)
+{
+#ifndef CONFIG_METAG_META12
+       int coreid = metag_in32(METAC_CORE_ID);
+       int config = metag_in32(METAC_CORE_CONFIG2);
+       int cfgcache = coreid & METAC_COREID_CFGCACHE_BITS;
+
+       if (cfgcache == METAC_COREID_CFGCACHE_TYPE0 ||
+           cfgcache == METAC_COREID_CFGCACHE_PRIVNOMMU) {
+               icache_sets_log2 = 1;
+               dcache_sets_log2 = 1;
+       }
+
+       /* For normal size caches, the smallest size is 4Kb.
+          For small caches, the smallest size is 64b */
+       icache_set_shift = (config & METAC_CORECFG2_ICSMALL_BIT)
+                               ? 6 : 12;
+       icache_set_shift += (config & METAC_CORE_C2ICSZ_BITS)
+                               >> METAC_CORE_C2ICSZ_S;
+       icache_set_shift -= icache_sets_log2;
+
+       dcache_set_shift = (config & METAC_CORECFG2_DCSMALL_BIT)
+                               ? 6 : 12;
+       dcache_set_shift += (config & METAC_CORECFG2_DCSZ_BITS)
+                               >> METAC_CORECFG2_DCSZ_S;
+       dcache_set_shift -= dcache_sets_log2;
+
+       metag_lnkget_probe();
+#else
+       /* Extract cache sizes from global heap segment */
+       unsigned long val, u;
+       int width, shift, addend;
+       PTBISEG seg;
+
+       seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+                                         TBID_SEGSCOPE_GLOBAL,
+                                         TBID_SEGTYPE_HEAP));
+       if (seg != NULL) {
+               val = seg->Data[1];
+
+               /* Work out width of I-cache size bit-field */
+               u = ((unsigned long) METAG_TBI_ICACHE_SIZE_BITS)
+                      >> METAG_TBI_ICACHE_SIZE_S;
+               width = 0;
+               while (u & 1) {
+                       width++;
+                       u >>= 1;
+               }
+               /* Extract sign-extended size addend value */
+               shift = 32 - (METAG_TBI_ICACHE_SIZE_S + width);
+               addend = (long) ((val & METAG_TBI_ICACHE_SIZE_BITS)
+                                << shift)
+                       >> (shift + METAG_TBI_ICACHE_SIZE_S);
+               /* Now calculate I-cache set size */
+               icache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+                                   - DEFAULT_CACHE_WAYS_LOG2)
+                                       + addend;
+
+               /* Similarly for D-cache */
+               u = ((unsigned long) METAG_TBI_DCACHE_SIZE_BITS)
+                      >> METAG_TBI_DCACHE_SIZE_S;
+               width = 0;
+               while (u & 1) {
+                       width++;
+                       u >>= 1;
+               }
+               shift = 32 - (METAG_TBI_DCACHE_SIZE_S + width);
+               addend = (long) ((val & METAG_TBI_DCACHE_SIZE_BITS)
+                                << shift)
+                       >> (shift + METAG_TBI_DCACHE_SIZE_S);
+               dcache_set_shift = (METAG_TBI_CACHE_SIZE_BASE_LOG2
+                                   - DEFAULT_CACHE_WAYS_LOG2)
+                                       + addend;
+       }
+#endif
+}
+
+static void metag_phys_data_cache_flush(const void *start)
+{
+       unsigned long flush0, flush1, flush2, flush3;
+       int loops, step;
+       int thread;
+       int part, offset;
+       int set_shift;
+
+       /* Use a sequence of writes to flush the cache region requested */
+       thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+                                         >> TXENABLE_THREAD_S;
+
+       /* Cache is broken into sets which lie in contiguous RAMs */
+       set_shift = dcache_set_shift;
+
+       /* Move to the base of the physical cache flush region */
+       flush0 = LINSYSCFLUSH_DCACHE_LINE;
+       step   = 64;
+
+       /* Get partition data for this thread */
+       part = metag_in32(SYSC_DCPART0 +
+                             (SYSC_xCPARTn_STRIDE * thread));
+
+       if ((int)start < 0)
+               /* Access Global vs Local partition */
+               part >>= SYSC_xCPARTG_AND_S
+                       - SYSC_xCPARTL_AND_S;
+
+       /* Extract offset and move SetOff */
+       offset = (part & SYSC_xCPARTL_OR_BITS)
+                       >> SYSC_xCPARTL_OR_S;
+       flush0 += (offset << (set_shift - 4));
+
+       /* Shrink size */
+       part = (part & SYSC_xCPARTL_AND_BITS)
+                       >> SYSC_xCPARTL_AND_S;
+       loops = ((part + 1) << (set_shift - 4));
+
+       /* Reduce loops by step of cache line size */
+       loops /= step;
+
+       flush1 = flush0 + (1 << set_shift);
+       flush2 = flush0 + (2 << set_shift);
+       flush3 = flush0 + (3 << set_shift);
+
+       if (dcache_sets_log2 == 1) {
+               flush2 = flush1;
+               flush3 = flush1 + step;
+               flush1 = flush0 + step;
+               step  <<= 1;
+               loops >>= 1;
+       }
+
+       /* Clear loops ways in cache */
+       while (loops-- != 0) {
+               /* Clear the ways. */
+#if 0
+               /*
+                * GCC doesn't generate very good code for this so we
+                * provide inline assembly instead.
+                */
+               metag_out8(0, flush0);
+               metag_out8(0, flush1);
+               metag_out8(0, flush2);
+               metag_out8(0, flush3);
+
+               flush0 += step;
+               flush1 += step;
+               flush2 += step;
+               flush3 += step;
+#else
+               asm volatile (
+                       "SETB\t[%0+%4++],%5\n"
+                       "SETB\t[%1+%4++],%5\n"
+                       "SETB\t[%2+%4++],%5\n"
+                       "SETB\t[%3+%4++],%5\n"
+                       : "+e" (flush0),
+                         "+e" (flush1),
+                         "+e" (flush2),
+                         "+e" (flush3)
+                       : "e" (step), "a" (0));
+#endif
+       }
+}
+
+void metag_data_cache_flush_all(const void *start)
+{
+       if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+               /* No need to flush the data cache it's not actually enabled */
+               return;
+
+       metag_phys_data_cache_flush(start);
+}
+
+void metag_data_cache_flush(const void *start, int bytes)
+{
+       unsigned long flush0;
+       int loops, step;
+
+       if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_DC_ON_BIT) == 0)
+               /* No need to flush the data cache it's not actually enabled */
+               return;
+
+       if (bytes >= 4096) {
+               metag_phys_data_cache_flush(start);
+               return;
+       }
+
+       /* Use linear cache flush mechanism on META IP */
+       flush0 = (int)start;
+       loops  = ((int)start & (DCACHE_LINE_BYTES - 1)) + bytes +
+                                       (DCACHE_LINE_BYTES - 1);
+       loops  >>= DCACHE_LINE_S;
+
+#define PRIM_FLUSH(addr, offset) do {                  \
+       int __addr = ((int) (addr)) + ((offset) * 64);  \
+       __builtin_dcache_flush((void *)(__addr));       \
+       } while (0)
+
+#define LOOP_INC (4*64)
+
+       do {
+               /* By default stop */
+               step = 0;
+
+               switch (loops) {
+               /* Drop Thru Cases! */
+               default:
+                       PRIM_FLUSH(flush0, 3);
+                       loops -= 4;
+                       step = 1;
+               case 3:
+                       PRIM_FLUSH(flush0, 2);
+               case 2:
+                       PRIM_FLUSH(flush0, 1);
+               case 1:
+                       PRIM_FLUSH(flush0, 0);
+                       flush0 += LOOP_INC;
+               case 0:
+                       break;
+               }
+       } while (step);
+}
+EXPORT_SYMBOL(metag_data_cache_flush);
+
+static void metag_phys_code_cache_flush(const void *start, int bytes)
+{
+       unsigned long flush0, flush1, flush2, flush3, end_set;
+       int loops, step;
+       int thread;
+       int set_shift, set_size;
+       int part, offset;
+
+       /* Use a sequence of writes to flush the cache region requested */
+       thread = (__core_reg_get(TXENABLE) & TXENABLE_THREAD_BITS)
+                                         >> TXENABLE_THREAD_S;
+       set_shift = icache_set_shift;
+
+       /* Move to the base of the physical cache flush region */
+       flush0 = LINSYSCFLUSH_ICACHE_LINE;
+       step   = 64;
+
+       /* Get partition code for this thread */
+       part = metag_in32(SYSC_ICPART0 +
+                         (SYSC_xCPARTn_STRIDE * thread));
+
+       if ((int)start < 0)
+               /* Access Global vs Local partition */
+               part >>= SYSC_xCPARTG_AND_S-SYSC_xCPARTL_AND_S;
+
+       /* Extract offset and move SetOff */
+       offset = (part & SYSC_xCPARTL_OR_BITS)
+                       >> SYSC_xCPARTL_OR_S;
+       flush0 += (offset << (set_shift - 4));
+
+       /* Shrink size */
+       part = (part & SYSC_xCPARTL_AND_BITS)
+                       >> SYSC_xCPARTL_AND_S;
+       loops = ((part + 1) << (set_shift - 4));
+
+       /* Where does the Set end? */
+       end_set = flush0 + loops;
+       set_size = loops;
+
+#ifdef CONFIG_METAG_META12
+       if ((bytes < 4096) && (bytes < loops)) {
+               /* Unreachable on HTP/MTP */
+               /* Only target the sets that could be relavent */
+               flush0 += (loops - step) & ((int) start);
+               loops = (((int) start) & (step-1)) + bytes + step - 1;
+       }
+#endif
+
+       /* Reduce loops by step of cache line size */
+       loops /= step;
+
+       flush1 = flush0 + (1<<set_shift);
+       flush2 = flush0 + (2<<set_shift);
+       flush3 = flush0 + (3<<set_shift);
+
+       if (icache_sets_log2 == 1) {
+               flush2 = flush1;
+               flush3 = flush1 + step;
+               flush1 = flush0 + step;
+#if 0
+               /* flush0 will stop one line early in this case
+                * (flush1 will do the final line).
+                * However we don't correct end_set here at the moment
+                * because it will never wrap on HTP/MTP
+                */
+               end_set -= step;
+#endif
+               step  <<= 1;
+               loops >>= 1;
+       }
+
+       /* Clear loops ways in cache */
+       while (loops-- != 0) {
+#if 0
+               /*
+                * GCC doesn't generate very good code for this so we
+                * provide inline assembly instead.
+                */
+               /* Clear the ways */
+               metag_out8(0, flush0);
+               metag_out8(0, flush1);
+               metag_out8(0, flush2);
+               metag_out8(0, flush3);
+
+               flush0 += step;
+               flush1 += step;
+               flush2 += step;
+               flush3 += step;
+#else
+               asm volatile (
+                       "SETB\t[%0+%4++],%5\n"
+                       "SETB\t[%1+%4++],%5\n"
+                       "SETB\t[%2+%4++],%5\n"
+                       "SETB\t[%3+%4++],%5\n"
+                       : "+e" (flush0),
+                         "+e" (flush1),
+                         "+e" (flush2),
+                         "+e" (flush3)
+                       : "e" (step), "a" (0));
+#endif
+
+               if (flush0 == end_set) {
+                       /* Wrap within Set 0 */
+                       flush0 -= set_size;
+                       flush1 -= set_size;
+                       flush2 -= set_size;
+                       flush3 -= set_size;
+               }
+       }
+}
+
+void metag_code_cache_flush_all(const void *start)
+{
+       if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+               /* No need to flush the code cache it's not actually enabled */
+               return;
+
+       metag_phys_code_cache_flush(start, 4096);
+}
+EXPORT_SYMBOL(metag_code_cache_flush_all);
+
+void metag_code_cache_flush(const void *start, int bytes)
+{
+#ifndef CONFIG_METAG_META12
+       void *flush;
+       int loops, step;
+#endif /* !CONFIG_METAG_META12 */
+
+       if ((metag_in32(SYSC_CACHE_MMU_CONFIG) & SYSC_CMMUCFG_IC_ON_BIT) == 0)
+               /* No need to flush the code cache it's not actually enabled */
+               return;
+
+#ifdef CONFIG_METAG_META12
+       /* CACHEWD isn't available on Meta1, so always do full cache flush */
+       metag_phys_code_cache_flush(start, bytes);
+
+#else /* CONFIG_METAG_META12 */
+       /* If large size do full physical cache flush */
+       if (bytes >= 4096) {
+               metag_phys_code_cache_flush(start, bytes);
+               return;
+       }
+
+       /* Use linear cache flush mechanism on META IP */
+       flush = (void *)((int)start & ~(ICACHE_LINE_BYTES-1));
+       loops  = ((int)start & (ICACHE_LINE_BYTES-1)) + bytes +
+               (ICACHE_LINE_BYTES-1);
+       loops  >>= ICACHE_LINE_S;
+
+#define PRIM_IFLUSH(addr, offset) \
+       __builtin_meta2_cachewd(((addr) + ((offset) * 64)), CACHEW_ICACHE_BIT)
+
+#define LOOP_INC (4*64)
+
+       do {
+               /* By default stop */
+               step = 0;
+
+               switch (loops) {
+               /* Drop Thru Cases! */
+               default:
+                       PRIM_IFLUSH(flush, 3);
+                       loops -= 4;
+                       step = 1;
+               case 3:
+                       PRIM_IFLUSH(flush, 2);
+               case 2:
+                       PRIM_IFLUSH(flush, 1);
+               case 1:
+                       PRIM_IFLUSH(flush, 0);
+                       flush += LOOP_INC;
+               case 0:
+                       break;
+               }
+       } while (step);
+#endif /* !CONFIG_METAG_META12 */
+}
+EXPORT_SYMBOL(metag_code_cache_flush);
diff --git a/arch/metag/mm/extable.c b/arch/metag/mm/extable.c
new file mode 100644 (file)
index 0000000..2a21eae
--- /dev/null
@@ -0,0 +1,15 @@
+
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+       const struct exception_table_entry *fixup;
+       unsigned long pc = instruction_pointer(regs);
+
+       fixup = search_exception_tables(pc);
+       if (fixup)
+               regs->ctx.CurrPC = fixup->fixup;
+
+       return fixup != NULL;
+}
diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c
new file mode 100644 (file)
index 0000000..2c75bf7
--- /dev/null
@@ -0,0 +1,239 @@
+/*
+ *  Meta page fault handling.
+ *
+ *  Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ */
+
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/interrupt.h>
+#include <linux/uaccess.h>
+
+#include <asm/tlbflush.h>
+#include <asm/mmu.h>
+#include <asm/traps.h>
+
+/* Clear any pending catch buffer state. */
+static void clear_cbuf_entry(struct pt_regs *regs, unsigned long addr,
+                            unsigned int trapno)
+{
+       PTBICTXEXTCB0 cbuf = regs->extcb0;
+
+       switch (trapno) {
+               /* Instruction fetch faults leave no catch buffer state. */
+       case TBIXXF_SIGNUM_IGF:
+       case TBIXXF_SIGNUM_IPF:
+               return;
+       default:
+               if (cbuf[0].CBAddr == addr) {
+                       cbuf[0].CBAddr = 0;
+                       cbuf[0].CBFlags &= ~TXCATCH0_FAULT_BITS;
+
+                       /* And, as this is the ONLY catch entry, we
+                        * need to clear the cbuf bit from the context!
+                        */
+                       regs->ctx.SaveMask &= ~(TBICTX_CBUF_BIT |
+                                               TBICTX_XCBF_BIT);
+
+                       return;
+               }
+               pr_err("Failed to clear cbuf entry!\n");
+       }
+}
+
+int show_unhandled_signals = 1;
+
+int do_page_fault(struct pt_regs *regs, unsigned long address,
+                 unsigned int write_access, unsigned int trapno)
+{
+       struct task_struct *tsk;
+       struct mm_struct *mm;
+       struct vm_area_struct *vma, *prev_vma;
+       siginfo_t info;
+       int fault;
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
+                               (write_access ? FAULT_FLAG_WRITE : 0);
+
+       tsk = current;
+
+       if ((address >= VMALLOC_START) && (address < VMALLOC_END)) {
+               /*
+                * Synchronize this task's top level page-table
+                * with the 'reference' page table.
+                *
+                * Do _not_ use "tsk" here. We might be inside
+                * an interrupt in the middle of a task switch..
+                */
+               int offset = pgd_index(address);
+               pgd_t *pgd, *pgd_k;
+               pud_t *pud, *pud_k;
+               pmd_t *pmd, *pmd_k;
+               pte_t *pte_k;
+
+               pgd = ((pgd_t *)mmu_get_base()) + offset;
+               pgd_k = swapper_pg_dir + offset;
+
+               /* This will never happen with the folded page table. */
+               if (!pgd_present(*pgd)) {
+                       if (!pgd_present(*pgd_k))
+                               goto bad_area_nosemaphore;
+                       set_pgd(pgd, *pgd_k);
+                       return 0;
+               }
+
+               pud = pud_offset(pgd, address);
+               pud_k = pud_offset(pgd_k, address);
+               if (!pud_present(*pud_k))
+                       goto bad_area_nosemaphore;
+               set_pud(pud, *pud_k);
+
+               pmd = pmd_offset(pud, address);
+               pmd_k = pmd_offset(pud_k, address);
+               if (!pmd_present(*pmd_k))
+                       goto bad_area_nosemaphore;
+               set_pmd(pmd, *pmd_k);
+
+               pte_k = pte_offset_kernel(pmd_k, address);
+               if (!pte_present(*pte_k))
+                       goto bad_area_nosemaphore;
+
+               /* May only be needed on Chorus2 */
+               flush_tlb_all();
+               return 0;
+       }
+
+       mm = tsk->mm;
+
+       if (in_atomic() || !mm)
+               goto no_context;
+
+retry:
+       down_read(&mm->mmap_sem);
+
+       vma = find_vma_prev(mm, address, &prev_vma);
+
+       if (!vma || address < vma->vm_start)
+               goto check_expansion;
+
+good_area:
+       if (write_access) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto bad_area;
+       } else {
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+                       goto bad_area;
+       }
+
+       /*
+        * If for any reason at all we couldn't handle the fault,
+        * make sure we exit gracefully rather than endlessly redo
+        * the fault.
+        */
+       fault = handle_mm_fault(mm, vma, address, flags);
+
+       if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+               return 0;
+
+       if (unlikely(fault & VM_FAULT_ERROR)) {
+               if (fault & VM_FAULT_OOM)
+                       goto out_of_memory;
+               else if (fault & VM_FAULT_SIGBUS)
+                       goto do_sigbus;
+               BUG();
+       }
+       if (flags & FAULT_FLAG_ALLOW_RETRY) {
+               if (fault & VM_FAULT_MAJOR)
+                       tsk->maj_flt++;
+               else
+                       tsk->min_flt++;
+               if (fault & VM_FAULT_RETRY) {
+                       flags &= ~FAULT_FLAG_ALLOW_RETRY;
+                       flags |= FAULT_FLAG_TRIED;
+
+                       /*
+                        * No need to up_read(&mm->mmap_sem) as we would
+                        * have already released it in __lock_page_or_retry
+                        * in mm/filemap.c.
+                        */
+
+                       goto retry;
+               }
+       }
+
+       up_read(&mm->mmap_sem);
+       return 0;
+
+check_expansion:
+       vma = prev_vma;
+       if (vma && (expand_stack(vma, address) == 0))
+               goto good_area;
+
+bad_area:
+       up_read(&mm->mmap_sem);
+
+bad_area_nosemaphore:
+       if (user_mode(regs)) {
+               info.si_signo = SIGSEGV;
+               info.si_errno = 0;
+               info.si_code = SEGV_MAPERR;
+               info.si_addr = (__force void __user *)address;
+               info.si_trapno = trapno;
+
+               if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+                   printk_ratelimit()) {
+                       pr_info("%s%s[%d]: segfault at %lx pc %08x sp %08x write %d trap %#x (%s)",
+                              task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+                              tsk->comm, task_pid_nr(tsk), address,
+                              regs->ctx.CurrPC, regs->ctx.AX[0].U0,
+                              write_access, trapno, trap_name(trapno));
+                       print_vma_addr(" in ", regs->ctx.CurrPC);
+                       print_vma_addr(" rtp in ", regs->ctx.DX[4].U1);
+                       printk("\n");
+                       show_regs(regs);
+               }
+               force_sig_info(SIGSEGV, &info, tsk);
+               return 1;
+       }
+       goto no_context;
+
+do_sigbus:
+       up_read(&mm->mmap_sem);
+
+       /*
+        * Send a sigbus, regardless of whether we were in kernel
+        * or user mode.
+        */
+       info.si_signo = SIGBUS;
+       info.si_errno = 0;
+       info.si_code = BUS_ADRERR;
+       info.si_addr = (__force void __user *)address;
+       info.si_trapno = trapno;
+       force_sig_info(SIGBUS, &info, tsk);
+
+       /* Kernel mode? Handle exceptions or die */
+       if (!user_mode(regs))
+               goto no_context;
+
+       return 1;
+
+       /*
+        * We ran out of memory, or some other thing happened to us that made
+        * us unable to handle the page fault gracefully.
+        */
+out_of_memory:
+       up_read(&mm->mmap_sem);
+       if (user_mode(regs))
+               do_group_exit(SIGKILL);
+
+no_context:
+       /* Are we prepared to handle this kernel fault?  */
+       if (fixup_exception(regs)) {
+               clear_cbuf_entry(regs, address, trapno);
+               return 1;
+       }
+
+       die("Oops", regs, (write_access << 15) | trapno, address);
+       do_exit(SIGKILL);
+}
diff --git a/arch/metag/mm/highmem.c b/arch/metag/mm/highmem.c
new file mode 100644 (file)
index 0000000..d71f621
--- /dev/null
@@ -0,0 +1,133 @@
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+
+static pte_t *kmap_pte;
+
+unsigned long highstart_pfn, highend_pfn;
+
+void *kmap(struct page *page)
+{
+       might_sleep();
+       if (!PageHighMem(page))
+               return page_address(page);
+       return kmap_high(page);
+}
+EXPORT_SYMBOL(kmap);
+
+void kunmap(struct page *page)
+{
+       BUG_ON(in_interrupt());
+       if (!PageHighMem(page))
+               return;
+       kunmap_high(page);
+}
+EXPORT_SYMBOL(kunmap);
+
+/*
+ * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
+ * no global lock is needed and because the kmap code must perform a global TLB
+ * invalidation when the kmap pool wraps.
+ *
+ * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * kmaps are appropriate for short, tight code paths only.
+ */
+
+void *kmap_atomic(struct page *page)
+{
+       enum fixed_addresses idx;
+       unsigned long vaddr;
+       int type;
+
+       /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
+       pagefault_disable();
+       if (!PageHighMem(page))
+               return page_address(page);
+
+       type = kmap_atomic_idx_push();
+       idx = type + KM_TYPE_NR * smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+       BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+       set_pte(kmap_pte - idx, mk_pte(page, PAGE_KERNEL));
+
+       return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kvaddr)
+{
+       unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+       int idx, type;
+
+       if (kvaddr >= (void *)FIXADDR_START) {
+               type = kmap_atomic_idx();
+               idx = type + KM_TYPE_NR * smp_processor_id();
+
+               /*
+                * Force other mappings to Oops if they'll try to access this
+                * pte without first remap it.  Keeping stale mappings around
+                * is a bad idea also, in case the page changes cacheability
+                * attributes or becomes a protected page in a hypervisor.
+                */
+               pte_clear(&init_mm, vaddr, kmap_pte-idx);
+               flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+               kmap_atomic_idx_pop();
+       }
+
+       pagefault_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
+{
+       enum fixed_addresses idx;
+       unsigned long vaddr;
+       int type;
+
+       pagefault_disable();
+
+       type = kmap_atomic_idx_push();
+       idx = type + KM_TYPE_NR * smp_processor_id();
+       vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+#ifdef CONFIG_DEBUG_HIGHMEM
+       BUG_ON(!pte_none(*(kmap_pte - idx)));
+#endif
+       set_pte(kmap_pte - idx, pfn_pte(pfn, PAGE_KERNEL));
+       flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+
+       return (void *)vaddr;
+}
+
+struct page *kmap_atomic_to_page(void *ptr)
+{
+       unsigned long vaddr = (unsigned long)ptr;
+       int idx;
+       pte_t *pte;
+
+       if (vaddr < FIXADDR_START)
+               return virt_to_page(ptr);
+
+       idx = virt_to_fix(vaddr);
+       pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
+       return pte_page(*pte);
+}
+
+void __init kmap_init(void)
+{
+       unsigned long kmap_vstart;
+
+       /* cache the first kmap pte */
+       kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+       kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+}
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
new file mode 100644 (file)
index 0000000..3c52fa6
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * arch/metag/mm/hugetlbpage.c
+ *
+ * METAG HugeTLB page support.
+ *
+ * Cloned from SuperH
+ *
+ * Cloned from sparc64 by Paul Mundt.
+ *
+ * Copyright (C) 2002, 2003 David S. Miller (davem@redhat.com)
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/pagemap.h>
+#include <linux/sysctl.h>
+
+#include <asm/mman.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+int prepare_hugepage_range(struct file *file, unsigned long addr,
+                                               unsigned long len)
+{
+       struct mm_struct *mm = current->mm;
+       struct hstate *h = hstate_file(file);
+       struct vm_area_struct *vma;
+
+       if (len & ~huge_page_mask(h))
+               return -EINVAL;
+       if (addr & ~huge_page_mask(h))
+               return -EINVAL;
+       if (TASK_SIZE - len < addr)
+               return -EINVAL;
+
+       vma = find_vma(mm, ALIGN_HUGEPT(addr));
+       if (vma && !(vma->vm_flags & MAP_HUGETLB))
+               return -EINVAL;
+
+       vma = find_vma(mm, addr);
+       if (vma) {
+               if (addr + len > vma->vm_start)
+                       return -EINVAL;
+               if (!(vma->vm_flags & MAP_HUGETLB) &&
+                   (ALIGN_HUGEPT(addr + len) > vma->vm_start))
+                       return -EINVAL;
+       }
+       return 0;
+}
+
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+                       unsigned long addr, unsigned long sz)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pgd = pgd_offset(mm, addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       pte = pte_alloc_map(mm, NULL, pmd, addr);
+       pgd->pgd &= ~_PAGE_SZ_MASK;
+       pgd->pgd |= _PAGE_SZHUGE;
+
+       return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
+{
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte = NULL;
+
+       pgd = pgd_offset(mm, addr);
+       pud = pud_offset(pgd, addr);
+       pmd = pmd_offset(pud, addr);
+       pte = pte_offset_kernel(pmd, addr);
+
+       return pte;
+}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+       return 0;
+}
+
+struct page *follow_huge_addr(struct mm_struct *mm,
+                             unsigned long address, int write)
+{
+       return ERR_PTR(-EINVAL);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+       return pmd_page_shift(pmd) > PAGE_SHIFT;
+}
+
+int pud_huge(pud_t pud)
+{
+       return 0;
+}
+
+struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
+                            pmd_t *pmd, int write)
+{
+       return NULL;
+}
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+
+/*
+ * Look for an unmapped area starting after another hugetlb vma.
+ * There are guaranteed to be no huge pte's spare if all the huge pages are
+ * full size (4MB), so in that case compile out this search.
+ */
+#if HPAGE_SHIFT == HUGEPT_SHIFT
+static inline unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+       return 0;
+}
+#else
+static unsigned long
+hugetlb_get_unmapped_area_existing(unsigned long len)
+{
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long start_addr, addr;
+       int after_huge;
+
+       if (mm->context.part_huge) {
+               start_addr = mm->context.part_huge;
+               after_huge = 1;
+       } else {
+               start_addr = TASK_UNMAPPED_BASE;
+               after_huge = 0;
+       }
+new_search:
+       addr = start_addr;
+
+       for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+               if ((!vma && !after_huge) || TASK_SIZE - len < addr) {
+                       /*
+                        * Start a new search - just in case we missed
+                        * some holes.
+                        */
+                       if (start_addr != TASK_UNMAPPED_BASE) {
+                               start_addr = TASK_UNMAPPED_BASE;
+                               goto new_search;
+                       }
+                       return 0;
+               }
+               /* skip ahead if we've aligned right over some vmas */
+               if (vma && vma->vm_end <= addr)
+                       continue;
+               /* space before the next vma? */
+               if (after_huge && (!vma || ALIGN_HUGEPT(addr + len)
+                           <= vma->vm_start)) {
+                       unsigned long end = addr + len;
+                       if (end & HUGEPT_MASK)
+                               mm->context.part_huge = end;
+                       else if (addr == mm->context.part_huge)
+                               mm->context.part_huge = 0;
+                       return addr;
+               }
+               if (vma && (vma->vm_flags & MAP_HUGETLB)) {
+                       /* space after a huge vma in 2nd level page table? */
+                       if (vma->vm_end & HUGEPT_MASK) {
+                               after_huge = 1;
+                               /* no need to align to the next PT block */
+                               addr = vma->vm_end;
+                               continue;
+                       }
+               }
+               after_huge = 0;
+               addr = ALIGN_HUGEPT(vma->vm_end);
+       }
+}
+#endif
+
+/* Do a full search to find an area without any nearby normal pages. */
+static unsigned long
+hugetlb_get_unmapped_area_new_pmd(unsigned long len)
+{
+       struct vm_unmapped_area_info info;
+
+       info.flags = 0;
+       info.length = len;
+       info.low_limit = TASK_UNMAPPED_BASE;
+       info.high_limit = TASK_SIZE;
+       info.align_mask = PAGE_MASK & HUGEPT_MASK;
+       info.align_offset = 0;
+       return vm_unmapped_area(&info);
+}
+
+unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+               unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+       struct hstate *h = hstate_file(file);
+
+       if (len & ~huge_page_mask(h))
+               return -EINVAL;
+       if (len > TASK_SIZE)
+               return -ENOMEM;
+
+       if (flags & MAP_FIXED) {
+               if (prepare_hugepage_range(file, addr, len))
+                       return -EINVAL;
+               return addr;
+       }
+
+       if (addr) {
+               addr = ALIGN(addr, huge_page_size(h));
+               if (!prepare_hugepage_range(file, addr, len))
+                       return addr;
+       }
+
+       /*
+        * Look for an existing hugetlb vma with space after it (this is to to
+        * minimise fragmentation caused by huge pages.
+        */
+       addr = hugetlb_get_unmapped_area_existing(len);
+       if (addr)
+               return addr;
+
+       /*
+        * Find an unmapped naturally aligned set of 4MB blocks that we can use
+        * for huge pages.
+        */
+       return hugetlb_get_unmapped_area_new_pmd(len);
+}
+
+#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/
+
+/* necessary for boot time 4MB huge page allocation */
+static __init int setup_hugepagesz(char *opt)
+{
+       unsigned long ps = memparse(opt, &opt);
+       if (ps == (1 << HPAGE_SHIFT)) {
+               hugetlb_add_hstate(HPAGE_SHIFT - PAGE_SHIFT);
+       } else {
+               pr_err("hugepagesz: Unsupported page size %lu M\n",
+                      ps >> 20);
+               return 0;
+       }
+       return 1;
+}
+__setup("hugepagesz=", setup_hugepagesz);
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
new file mode 100644 (file)
index 0000000..504a398
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ *  Copyright (C) 2005,2006,2007,2008,2009,2010 Imagination Technologies
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/pagemap.h>
+#include <linux/percpu.h>
+#include <linux/memblock.h>
+#include <linux/initrd.h>
+#include <linux/of_fdt.h>
+
+#include <asm/setup.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/sections.h>
+#include <asm/tlb.h>
+#include <asm/user_gateway.h>
+#include <asm/mmzone.h>
+#include <asm/fixmap.h>
+
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_data;
+
+unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern char __user_gateway_start;
+extern char __user_gateway_end;
+
+void *gateway_page;
+
+/*
+ * Insert the gateway page into a set of page tables, creating the
+ * page tables if necessary.
+ */
+static void insert_gateway_page(pgd_t *pgd, unsigned long address)
+{
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       BUG_ON(!pgd_present(*pgd));
+
+       pud = pud_offset(pgd, address);
+       BUG_ON(!pud_present(*pud));
+
+       pmd = pmd_offset(pud, address);
+       if (!pmd_present(*pmd)) {
+               pte = alloc_bootmem_pages(PAGE_SIZE);
+               set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte)));
+       }
+
+       pte = pte_offset_kernel(pmd, address);
+       set_pte(pte, pfn_pte(__pa(gateway_page) >> PAGE_SHIFT, PAGE_READONLY));
+}
+
+/* Alloc and map a page in a known location accessible to userspace. */
+static void __init user_gateway_init(void)
+{
+       unsigned long address = USER_GATEWAY_PAGE;
+       int offset = pgd_index(address);
+       pgd_t *pgd;
+
+       gateway_page = alloc_bootmem_pages(PAGE_SIZE);
+
+       pgd = swapper_pg_dir + offset;
+       insert_gateway_page(pgd, address);
+
+#ifdef CONFIG_METAG_META12
+       /*
+        * Insert the gateway page into our current page tables even
+        * though we've already inserted it into our reference page
+        * table (swapper_pg_dir). This is because with a META1 mmu we
+        * copy just the user address range and not the gateway page
+        * entry on context switch, see switch_mmu().
+        */
+       pgd = (pgd_t *)mmu_get_base() + offset;
+       insert_gateway_page(pgd, address);
+#endif /* CONFIG_METAG_META12 */
+
+       BUG_ON((&__user_gateway_end - &__user_gateway_start) > PAGE_SIZE);
+
+       gateway_page += (address & ~PAGE_MASK);
+
+       memcpy(gateway_page, &__user_gateway_start,
+              &__user_gateway_end - &__user_gateway_start);
+
+       /*
+        * We don't need to flush the TLB here, there should be no mapping
+        * present at boot for this address and only valid mappings are in
+        * the TLB (apart from on Meta 1.x, but those cached invalid
+        * mappings should be impossible to hit here).
+        *
+        * We don't flush the code cache here even though we have written
+        * code through the data cache and they may not be coherent. At
+        * this point we assume there is no stale data in the code cache
+        * for this address so there is no need to flush.
+        */
+}
+
+static void __init allocate_pgdat(unsigned int nid)
+{
+       unsigned long start_pfn, end_pfn;
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+       unsigned long phys;
+#endif
+
+       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+       phys = __memblock_alloc_base(sizeof(struct pglist_data),
+                               SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT);
+       /* Retry with all of system memory */
+       if (!phys)
+               phys = __memblock_alloc_base(sizeof(struct pglist_data),
+                                            SMP_CACHE_BYTES,
+                                            memblock_end_of_DRAM());
+       if (!phys)
+               panic("Can't allocate pgdat for node %d\n", nid);
+
+       NODE_DATA(nid) = __va(phys);
+       memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+       NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+#endif
+
+       NODE_DATA(nid)->node_start_pfn = start_pfn;
+       NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+}
+
+static void __init bootmem_init_one_node(unsigned int nid)
+{
+       unsigned long total_pages, paddr;
+       unsigned long end_pfn;
+       struct pglist_data *p;
+
+       p = NODE_DATA(nid);
+
+       /* Nothing to do.. */
+       if (!p->node_spanned_pages)
+               return;
+
+       end_pfn = p->node_start_pfn + p->node_spanned_pages;
+#ifdef CONFIG_HIGHMEM
+       if (end_pfn > max_low_pfn)
+               end_pfn = max_low_pfn;
+#endif
+
+       total_pages = bootmem_bootmap_pages(end_pfn - p->node_start_pfn);
+
+       paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE);
+       if (!paddr)
+               panic("Can't allocate bootmap for nid[%d]\n", nid);
+
+       init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn);
+
+       free_bootmem_with_active_regions(nid, end_pfn);
+
+       /*
+        * XXX Handle initial reservations for the system memory node
+        * only for the moment, we'll refactor this later for handling
+        * reservations in other nodes.
+        */
+       if (nid == 0) {
+               struct memblock_region *reg;
+
+               /* Reserve the sections we're already using. */
+               for_each_memblock(reserved, reg) {
+                       unsigned long size = reg->size;
+
+#ifdef CONFIG_HIGHMEM
+                       /* ...but not highmem */
+                       if (PFN_DOWN(reg->base) >= highstart_pfn)
+                               continue;
+
+                       if (PFN_UP(reg->base + size) > highstart_pfn)
+                               size = (highstart_pfn - PFN_DOWN(reg->base))
+                                      << PAGE_SHIFT;
+#endif
+
+                       reserve_bootmem(reg->base, size, BOOTMEM_DEFAULT);
+               }
+       }
+
+       sparse_memory_present_with_active_regions(nid);
+}
+
+static void __init do_init_bootmem(void)
+{
+       struct memblock_region *reg;
+       int i;
+
+       /* Add active regions with valid PFNs. */
+       for_each_memblock(memory, reg) {
+               unsigned long start_pfn, end_pfn;
+               start_pfn = memblock_region_memory_base_pfn(reg);
+               end_pfn = memblock_region_memory_end_pfn(reg);
+               memblock_set_node(PFN_PHYS(start_pfn),
+                                 PFN_PHYS(end_pfn - start_pfn), 0);
+       }
+
+       /* All of system RAM sits in node 0 for the non-NUMA case */
+       allocate_pgdat(0);
+       node_set_online(0);
+
+       soc_mem_setup();
+
+       for_each_online_node(i)
+               bootmem_init_one_node(i);
+
+       sparse_init();
+}
+
+extern char _heap_start[];
+
+static void __init init_and_reserve_mem(void)
+{
+       unsigned long start_pfn, heap_start;
+       u64 base = min_low_pfn << PAGE_SHIFT;
+       u64 size = (max_low_pfn << PAGE_SHIFT) - base;
+
+       heap_start = (unsigned long) &_heap_start;
+
+       memblock_add(base, size);
+
+       /*
+        * Partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+       start_pfn = PFN_UP(__pa(heap_start));
+
+       /*
+        * Reserve the kernel text.
+        */
+       memblock_reserve(base, (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - base);
+
+#ifdef CONFIG_HIGHMEM
+       /*
+        * Add & reserve highmem, so page structures are initialised.
+        */
+       base = highstart_pfn << PAGE_SHIFT;
+       size = (highend_pfn << PAGE_SHIFT) - base;
+       if (size) {
+               memblock_add(base, size);
+               memblock_reserve(base, size);
+       }
+#endif
+}
+
+#ifdef CONFIG_HIGHMEM
+/*
+ * Ensure we have allocated page tables in swapper_pg_dir for the
+ * fixed mappings range from 'start' to 'end'.
+ */
+static void __init allocate_pgtables(unsigned long start, unsigned long end)
+{
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte;
+       int i, j;
+       unsigned long vaddr;
+
+       vaddr = start;
+       i = pgd_index(vaddr);
+       j = pmd_index(vaddr);
+       pgd = swapper_pg_dir + i;
+
+       for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
+               pmd = (pmd_t *)pgd;
+               for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) {
+                       vaddr += PMD_SIZE;
+
+                       if (!pmd_none(*pmd))
+                               continue;
+
+                       pte = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+                       pmd_populate_kernel(&init_mm, pmd, pte);
+               }
+               j = 0;
+       }
+}
+
+static void __init fixedrange_init(void)
+{
+       unsigned long vaddr, end;
+       pgd_t *pgd;
+       pud_t *pud;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       /*
+        * Fixed mappings:
+        */
+       vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+       end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
+       allocate_pgtables(vaddr, end);
+
+       /*
+        * Permanent kmaps:
+        */
+       vaddr = PKMAP_BASE;
+       allocate_pgtables(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP);
+
+       pgd = swapper_pg_dir + pgd_index(vaddr);
+       pud = pud_offset(pgd, vaddr);
+       pmd = pmd_offset(pud, vaddr);
+       pte = pte_offset_kernel(pmd, vaddr);
+       pkmap_page_table = pte;
+}
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * paging_init() continues the virtual memory environment setup which
+ * was begun by the code in arch/metag/kernel/setup.c.
+ */
+void __init paging_init(unsigned long mem_end)
+{
+       unsigned long max_zone_pfns[MAX_NR_ZONES];
+       int nid;
+
+       init_and_reserve_mem();
+
+       memblock_allow_resize();
+
+       memblock_dump_all();
+
+       nodes_clear(node_online_map);
+
+       init_new_context(&init_task, &init_mm);
+
+       memset(swapper_pg_dir, 0, sizeof(swapper_pg_dir));
+
+       do_init_bootmem();
+       mmu_init(mem_end);
+
+#ifdef CONFIG_HIGHMEM
+       fixedrange_init();
+       kmap_init();
+#endif
+
+       /* Initialize the zero page to a bootmem page, already zeroed. */
+       empty_zero_page = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+
+       user_gateway_init();
+
+       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+       for_each_online_node(nid) {
+               pg_data_t *pgdat = NODE_DATA(nid);
+               unsigned long low, start_pfn;
+
+               start_pfn = pgdat->bdata->node_min_pfn;
+               low = pgdat->bdata->node_low_pfn;
+
+               if (max_zone_pfns[ZONE_NORMAL] < low)
+                       max_zone_pfns[ZONE_NORMAL] = low;
+
+#ifdef CONFIG_HIGHMEM
+               max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
+#endif
+               pr_info("Node %u: start_pfn = 0x%lx, low = 0x%lx\n",
+                       nid, start_pfn, low);
+       }
+
+       free_area_init_nodes(max_zone_pfns);
+}
+
+void __init mem_init(void)
+{
+       int nid;
+
+#ifdef CONFIG_HIGHMEM
+       unsigned long tmp;
+       for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+               struct page *page = pfn_to_page(tmp);
+               ClearPageReserved(page);
+               init_page_count(page);
+               __free_page(page);
+               totalhigh_pages++;
+       }
+       totalram_pages += totalhigh_pages;
+       num_physpages += totalhigh_pages;
+#endif /* CONFIG_HIGHMEM */
+
+       for_each_online_node(nid) {
+               pg_data_t *pgdat = NODE_DATA(nid);
+               unsigned long node_pages = 0;
+
+               num_physpages += pgdat->node_present_pages;
+
+               if (pgdat->node_spanned_pages)
+                       node_pages = free_all_bootmem_node(pgdat);
+
+               totalram_pages += node_pages;
+       }
+
+       pr_info("Memory: %luk/%luk available\n",
+               (unsigned long)nr_free_pages() << (PAGE_SHIFT - 10),
+               num_physpages << (PAGE_SHIFT - 10));
+
+       show_mem(0);
+
+       return;
+}
+
+static void free_init_pages(char *what, unsigned long begin, unsigned long end)
+{
+       unsigned long addr;
+
+       for (addr = begin; addr < end; addr += PAGE_SIZE) {
+               ClearPageReserved(virt_to_page(addr));
+               init_page_count(virt_to_page(addr));
+               memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+               free_page(addr);
+               totalram_pages++;
+       }
+       pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+}
+
+void free_initmem(void)
+{
+       free_init_pages("unused kernel memory",
+                       (unsigned long)(&__init_begin),
+                       (unsigned long)(&__init_end));
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+       end = end & PAGE_MASK;
+       free_init_pages("initrd memory", start, end);
+}
+#endif
+
+#ifdef CONFIG_OF_FLATTREE
+void __init early_init_dt_setup_initrd_arch(unsigned long start,
+                                           unsigned long end)
+{
+       pr_err("%s(%lx, %lx)\n",
+              __func__, start, end);
+}
+#endif /* CONFIG_OF_FLATTREE */
diff --git a/arch/metag/mm/ioremap.c b/arch/metag/mm/ioremap.c
new file mode 100644 (file)
index 0000000..a136a43
--- /dev/null
@@ -0,0 +1,89 @@
+/*
+ * Re-map IO memory to kernel address space so that we can access it.
+ * Needed for memory-mapped I/O devices mapped outside our normal DRAM
+ * window (that is, all memory-mapped I/O devices).
+ *
+ * Copyright (C) 1995,1996 Linus Torvalds
+ *
+ * Meta port based on CRIS-port by Axis Communications AB
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * Remap an arbitrary physical address space into the kernel virtual
+ * address space. Needed when the kernel wants to access high addresses
+ * directly.
+ *
+ * NOTE! We need to allow non-page-aligned mappings too: we will obviously
+ * have to convert them into an offset in a page-aligned mapping, but the
+ * caller shouldn't need to know that small detail.
+ */
+void __iomem *__ioremap(unsigned long phys_addr, size_t size,
+                       unsigned long flags)
+{
+       unsigned long addr;
+       struct vm_struct *area;
+       unsigned long offset, last_addr;
+       pgprot_t prot;
+
+       /* Don't allow wraparound or zero size */
+       last_addr = phys_addr + size - 1;
+       if (!size || last_addr < phys_addr)
+               return NULL;
+
+       /* Custom region addresses are accessible and uncached by default. */
+       if (phys_addr >= LINSYSCUSTOM_BASE &&
+           phys_addr < (LINSYSCUSTOM_BASE + LINSYSCUSTOM_LIMIT))
+               return (__force void __iomem *) phys_addr;
+
+       /*
+        * Mappings have to be page-aligned
+        */
+       offset = phys_addr & ~PAGE_MASK;
+       phys_addr &= PAGE_MASK;
+       size = PAGE_ALIGN(last_addr+1) - phys_addr;
+       prot = __pgprot(_PAGE_PRESENT | _PAGE_WRITE | _PAGE_DIRTY |
+                       _PAGE_ACCESSED | _PAGE_KERNEL | _PAGE_CACHE_WIN0 |
+                       flags);
+
+       /*
+        * Ok, go for it..
+        */
+       area = get_vm_area(size, VM_IOREMAP);
+       if (!area)
+               return NULL;
+       area->phys_addr = phys_addr;
+       addr = (unsigned long) area->addr;
+       if (ioremap_page_range(addr, addr + size, phys_addr, prot)) {
+               vunmap((void *) addr);
+               return NULL;
+       }
+       return (__force void __iomem *) (offset + (char *)addr);
+}
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+       struct vm_struct *p;
+
+       if ((__force unsigned long)addr >= LINSYSCUSTOM_BASE &&
+           (__force unsigned long)addr < (LINSYSCUSTOM_BASE +
+                                          LINSYSCUSTOM_LIMIT))
+               return;
+
+       p = remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
+       if (unlikely(!p)) {
+               pr_err("iounmap: bad address %p\n", addr);
+               return;
+       }
+
+       kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);
diff --git a/arch/metag/mm/l2cache.c b/arch/metag/mm/l2cache.c
new file mode 100644 (file)
index 0000000..c64ee61
--- /dev/null
@@ -0,0 +1,192 @@
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+
+#include <asm/l2cache.h>
+#include <asm/metag_isa.h>
+
+/* If non-0, then initialise the L2 cache */
+static int l2cache_init = 1;
+/* If non-0, then initialise the L2 cache prefetch */
+static int l2cache_init_pf = 1;
+
+int l2c_pfenable;
+
+static volatile u32 l2c_testdata[16] __initdata __aligned(64);
+
+static int __init parse_l2cache(char *p)
+{
+       char *cp = p;
+
+       if (get_option(&cp, &l2cache_init) != 1) {
+               pr_err("Bad l2cache parameter (%s)\n", p);
+               return 1;
+       }
+       return 0;
+}
+early_param("l2cache", parse_l2cache);
+
+static int __init parse_l2cache_pf(char *p)
+{
+       char *cp = p;
+
+       if (get_option(&cp, &l2cache_init_pf) != 1) {
+               pr_err("Bad l2cache_pf parameter (%s)\n", p);
+               return 1;
+       }
+       return 0;
+}
+early_param("l2cache_pf", parse_l2cache_pf);
+
+static int __init meta_l2c_setup(void)
+{
+       /*
+        * If the L2 cache isn't even present, don't do anything, but say so in
+        * the log.
+        */
+       if (!meta_l2c_is_present()) {
+               pr_info("L2 Cache: Not present\n");
+               return 0;
+       }
+
+       /*
+        * Check whether the line size is recognised.
+        */
+       if (!meta_l2c_linesize()) {
+               pr_warn_once("L2 Cache: unknown line size id (config=0x%08x)\n",
+                            meta_l2c_config());
+       }
+
+       /*
+        * Initialise state.
+        */
+       l2c_pfenable = _meta_l2c_pf_is_enabled();
+
+       /*
+        * Enable the L2 cache and print to log whether it was already enabled
+        * by the bootloader.
+        */
+       if (l2cache_init) {
+               pr_info("L2 Cache: Enabling... ");
+               if (meta_l2c_enable())
+                       pr_cont("already enabled\n");
+               else
+                       pr_cont("done\n");
+       } else {
+               pr_info("L2 Cache: Not enabling\n");
+       }
+
+       /*
+        * Enable L2 cache prefetch.
+        */
+       if (l2cache_init_pf) {
+               pr_info("L2 Cache: Enabling prefetch... ");
+               if (meta_l2c_pf_enable(1))
+                       pr_cont("already enabled\n");
+               else
+                       pr_cont("done\n");
+       } else {
+               pr_info("L2 Cache: Not enabling prefetch\n");
+       }
+
+       return 0;
+}
+core_initcall(meta_l2c_setup);
+
+int meta_l2c_disable(void)
+{
+       unsigned long flags;
+       int en;
+
+       if (!meta_l2c_is_present())
+               return 1;
+
+       /*
+        * Prevent other threads writing during the writeback, otherwise the
+        * writes will get "lost" when the L2 is disabled.
+        */
+       __global_lock2(flags);
+       en = meta_l2c_is_enabled();
+       if (likely(en)) {
+               _meta_l2c_pf_enable(0);
+               wr_fence();
+               _meta_l2c_purge();
+               _meta_l2c_enable(0);
+       }
+       __global_unlock2(flags);
+
+       return !en;
+}
+
+int meta_l2c_enable(void)
+{
+       unsigned long flags;
+       int en;
+
+       if (!meta_l2c_is_present())
+               return 0;
+
+       /*
+        * Init (clearing the L2) can happen while the L2 is disabled, so other
+        * threads are safe to continue executing, however we must not init the
+        * cache if it's already enabled (dirty lines would be discarded), so
+        * this operation should still be atomic with other threads.
+        */
+       __global_lock1(flags);
+       en = meta_l2c_is_enabled();
+       if (likely(!en)) {
+               _meta_l2c_init();
+               _meta_l2c_enable(1);
+               _meta_l2c_pf_enable(l2c_pfenable);
+       }
+       __global_unlock1(flags);
+
+       return en;
+}
+
+int meta_l2c_pf_enable(int pfenable)
+{
+       unsigned long flags;
+       int en = l2c_pfenable;
+
+       if (!meta_l2c_is_present())
+               return 0;
+
+       /*
+        * We read modify write the enable register, so this operation must be
+        * atomic with other threads.
+        */
+       __global_lock1(flags);
+       en = l2c_pfenable;
+       l2c_pfenable = pfenable;
+       if (meta_l2c_is_enabled())
+               _meta_l2c_pf_enable(pfenable);
+       __global_unlock1(flags);
+
+       return en;
+}
+
+int meta_l2c_flush(void)
+{
+       unsigned long flags;
+       int en;
+
+       /*
+        * Prevent other threads writing during the writeback. This also
+        * involves read modify writes.
+        */
+       __global_lock2(flags);
+       en = meta_l2c_is_enabled();
+       if (likely(en)) {
+               _meta_l2c_pf_enable(0);
+               wr_fence();
+               _meta_l2c_purge();
+               _meta_l2c_enable(0);
+               _meta_l2c_init();
+               _meta_l2c_enable(1);
+               _meta_l2c_pf_enable(l2c_pfenable);
+       }
+       __global_unlock2(flags);
+
+       return !en;
+}
diff --git a/arch/metag/mm/maccess.c b/arch/metag/mm/maccess.c
new file mode 100644 (file)
index 0000000..eba2cfc
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * safe read and write memory routines callable while atomic
+ *
+ * Copyright 2012 Imagination Technologies
+ */
+
+#include <linux/uaccess.h>
+#include <asm/io.h>
+
+/*
+ * The generic probe_kernel_write() uses the user copy code which can split the
+ * writes if the source is unaligned, and repeats writes to make exceptions
+ * precise. We override it here to avoid these things happening to memory mapped
+ * IO memory where they could have undesired effects.
+ * Due to the use of CACHERD instruction this only works on Meta2 onwards.
+ */
+#ifdef CONFIG_METAG_META21
+long probe_kernel_write(void *dst, const void *src, size_t size)
+{
+       unsigned long ldst = (unsigned long)dst;
+       void __iomem *iodst = (void __iomem *)dst;
+       unsigned long lsrc = (unsigned long)src;
+       const u8 *psrc = (u8 *)src;
+       unsigned int pte, i;
+       u8 bounce[8] __aligned(8);
+
+       if (!size)
+               return 0;
+
+       /* Use the write combine bit to decide is the destination is MMIO. */
+       pte = __builtin_meta2_cacherd(dst);
+
+       /* Check the mapping is valid and writeable. */
+       if ((pte & (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+           != (MMCU_ENTRY_WR_BIT | MMCU_ENTRY_VAL_BIT))
+               return -EFAULT;
+
+       /* Fall back to generic version for cases we're not interested in. */
+       if (pte & MMCU_ENTRY_WRC_BIT    || /* write combined memory */
+           (ldst & (size - 1))         || /* destination unaligned */
+           size > 8                    || /* more than max write size */
+           (size & (size - 1)))           /* non power of 2 size */
+               return __probe_kernel_write(dst, src, size);
+
+       /* If src is unaligned, copy to the aligned bounce buffer first. */
+       if (lsrc & (size - 1)) {
+               for (i = 0; i < size; ++i)
+                       bounce[i] = psrc[i];
+               psrc = bounce;
+       }
+
+       switch (size) {
+       case 1:
+               writeb(*psrc, iodst);
+               break;
+       case 2:
+               writew(*(const u16 *)psrc, iodst);
+               break;
+       case 4:
+               writel(*(const u32 *)psrc, iodst);
+               break;
+       case 8:
+               writeq(*(const u64 *)psrc, iodst);
+               break;
+       }
+       return 0;
+}
+#endif
diff --git a/arch/metag/mm/mmu-meta1.c b/arch/metag/mm/mmu-meta1.c
new file mode 100644 (file)
index 0000000..91f4255
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ *  Copyright (C) 2005,2006,2007,2008,2009 Imagination Technologies
+ *
+ * Meta 1 MMU handling code.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/io.h>
+
+#include <asm/mmu.h>
+
+#define DM3_BASE (LINSYSDIRECT_BASE + (MMCU_DIRECTMAPn_ADDR_SCALE * 3))
+
+/*
+ * This contains the physical address of the top level 2k pgd table.
+ */
+static unsigned long mmu_base_phys;
+
+/*
+ * Given a physical address, return a mapped virtual address that can be used
+ * to access that location.
+ * In practice, we use the DirectMap region to make this happen.
+ */
+static unsigned long map_addr(unsigned long phys)
+{
+       static unsigned long dm_base = 0xFFFFFFFF;
+       int offset;
+
+       offset = phys - dm_base;
+
+       /* Are we in the current map range ? */
+       if ((offset < 0) || (offset >= MMCU_DIRECTMAPn_ADDR_SCALE)) {
+               /* Calculate new DM area */
+               dm_base = phys & ~(MMCU_DIRECTMAPn_ADDR_SCALE - 1);
+
+               /* Actually map it in! */
+               metag_out32(dm_base, MMCU_DIRECTMAP3_ADDR);
+
+               /* And calculate how far into that area our reference is */
+               offset = phys - dm_base;
+       }
+
+       return DM3_BASE + offset;
+}
+
+/*
+ * Return the physical address of the base of our pgd table.
+ */
+static inline unsigned long __get_mmu_base(void)
+{
+       unsigned long base_phys;
+       unsigned int stride;
+
+       if (is_global_space(PAGE_OFFSET))
+               stride = 4;
+       else
+               stride = hard_processor_id();   /* [0..3] */
+
+       base_phys = metag_in32(MMCU_TABLE_PHYS_ADDR);
+       base_phys += (0x800 * stride);
+
+       return base_phys;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pgd */
+static unsigned long pgd_entry_addr(unsigned long virt)
+{
+       unsigned long pgd_phys;
+       unsigned long pgd_virt;
+
+       if (!mmu_base_phys)
+               mmu_base_phys = __get_mmu_base();
+
+       /*
+        * Are we trying to map a global address.  If so, then index
+        * the global pgd table instead of our local one.
+        */
+       if (is_global_space(virt)) {
+               /* Scale into 2gig map */
+               virt &= ~0x80000000;
+       }
+
+       /* Base of the pgd table plus our 4Meg entry, 4bytes each */
+       pgd_phys = mmu_base_phys + ((virt >> PGDIR_SHIFT) * 4);
+
+       pgd_virt = map_addr(pgd_phys);
+
+       return pgd_virt;
+}
+
+/* Given a virtual address, return the virtual address of the relevant pte */
+static unsigned long pgtable_entry_addr(unsigned long virt)
+{
+       unsigned long pgtable_phys;
+       unsigned long pgtable_virt, pte_virt;
+
+       /* Find the physical address of the 4MB page table*/
+       pgtable_phys = metag_in32(pgd_entry_addr(virt)) & MMCU_ENTRY_ADDR_BITS;
+
+       /* Map it to a virtual address */
+       pgtable_virt = map_addr(pgtable_phys);
+
+       /* And index into it for our pte */
+       pte_virt = pgtable_virt + ((virt >> PAGE_SHIFT) & 0x3FF) * 4;
+
+       return pte_virt;
+}
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+       return metag_in32(pgd_entry_addr(vaddr));
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+       return metag_in32(pgtable_entry_addr(vaddr));
+}
+
+unsigned long mmu_get_base(void)
+{
+       static unsigned long __base;
+
+       /* Find the base of our MMU pgd table */
+       if (!__base)
+               __base = pgd_entry_addr(0);
+
+       return __base;
+}
+
+void __init mmu_init(unsigned long mem_end)
+{
+       unsigned long entry, addr;
+       pgd_t *p_swapper_pg_dir;
+
+       /*
+        * Now copy over any MMU pgd entries already in the mmu page tables
+        * over to our root init process (swapper_pg_dir) map.  This map is
+        * then inherited by all other processes, which means all processes
+        * inherit a map of the kernel space.
+        */
+       addr = PAGE_OFFSET;
+       entry = pgd_index(PAGE_OFFSET);
+       p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+       while (addr <= META_MEMORY_LIMIT) {
+               unsigned long pgd_entry;
+               /* copy over the current MMU value */
+               pgd_entry = mmu_read_first_level_page(addr);
+               pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+               p_swapper_pg_dir++;
+               addr += PGDIR_SIZE;
+               entry++;
+       }
+}
diff --git a/arch/metag/mm/mmu-meta2.c b/arch/metag/mm/mmu-meta2.c
new file mode 100644 (file)
index 0000000..81dcbb0
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2008,2009,2010,2011 Imagination Technologies Ltd.
+ *
+ * Meta 2 enhanced mode MMU handling code.
+ *
+ */
+
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/io.h>
+#include <linux/bootmem.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+
+unsigned long mmu_read_first_level_page(unsigned long vaddr)
+{
+       unsigned int cpu = hard_processor_id();
+       unsigned long offset, linear_base, linear_limit;
+       unsigned int phys0;
+       pgd_t *pgd, entry;
+
+       if (is_global_space(vaddr))
+               vaddr &= ~0x80000000;
+
+       offset = vaddr >> PGDIR_SHIFT;
+
+       phys0 = metag_in32(mmu_phys0_addr(cpu));
+
+       /* Top bit of linear base is always zero. */
+       linear_base = (phys0 >> PGDIR_SHIFT) & 0x1ff;
+
+       /* Limit in the range 0 (4MB) to 9 (2GB). */
+       linear_limit = 1 << ((phys0 >> 8) & 0xf);
+       linear_limit += linear_base;
+
+       /*
+        * If offset is below linear base or above the limit then no
+        * mapping exists.
+        */
+       if (offset < linear_base || offset > linear_limit)
+               return 0;
+
+       offset -= linear_base;
+       pgd = (pgd_t *)mmu_get_base();
+       entry = pgd[offset];
+
+       return pgd_val(entry);
+}
+
+unsigned long mmu_read_second_level_page(unsigned long vaddr)
+{
+       return __builtin_meta2_cacherd((void *)(vaddr & PAGE_MASK));
+}
+
+unsigned long mmu_get_base(void)
+{
+       unsigned int cpu = hard_processor_id();
+       unsigned long stride;
+
+       stride = cpu * LINSYSMEMTnX_STRIDE;
+
+       /*
+        * Bits 18:2 of the MMCU_TnLocal_TABLE_PHYS1 register should be
+        * used as an offset to the start of the top-level pgd table.
+        */
+       stride += (metag_in32(mmu_phys1_addr(cpu)) & 0x7fffc);
+
+       if (is_global_space(PAGE_OFFSET))
+               stride += LINSYSMEMTXG_OFFSET;
+
+       return LINSYSMEMT0L_BASE + stride;
+}
+
+#define FIRST_LEVEL_MASK       0xffffffc0
+#define SECOND_LEVEL_MASK      0xfffff000
+#define SECOND_LEVEL_ALIGN     64
+
+static void repriv_mmu_tables(void)
+{
+       unsigned long phys0_addr;
+       unsigned int g;
+
+       /*
+        * Check that all the mmu table regions are priv protected, and if not
+        * fix them and emit a warning. If we left them without priv protection
+        * then userland processes would have access to a 2M window into
+        * physical memory near where the page tables are.
+        */
+       phys0_addr = MMCU_T0LOCAL_TABLE_PHYS0;
+       for (g = 0; g < 2; ++g) {
+               unsigned int t, phys0;
+               unsigned long flags;
+               for (t = 0; t < 4; ++t) {
+                       __global_lock2(flags);
+                       phys0 = metag_in32(phys0_addr);
+                       if ((phys0 & _PAGE_PRESENT) && !(phys0 & _PAGE_PRIV)) {
+                               pr_warn("Fixing priv protection on T%d %s MMU table region\n",
+                                       t,
+                                       g ? "global" : "local");
+                               phys0 |= _PAGE_PRIV;
+                               metag_out32(phys0, phys0_addr);
+                       }
+                       __global_unlock2(flags);
+
+                       phys0_addr += MMCU_TnX_TABLE_PHYSX_STRIDE;
+               }
+
+               phys0_addr += MMCU_TXG_TABLE_PHYSX_OFFSET
+                           - 4*MMCU_TnX_TABLE_PHYSX_STRIDE;
+       }
+}
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+static void mmu_resume(void)
+{
+       /*
+        * If a full suspend to RAM has happened then the original bad MMU table
+        * priv may have been restored, so repriv them again.
+        */
+       repriv_mmu_tables();
+}
+#else
+#define mmu_resume NULL
+#endif /* CONFIG_METAG_SUSPEND_MEM */
+
+static struct syscore_ops mmu_syscore_ops = {
+       .resume  = mmu_resume,
+};
+
+void __init mmu_init(unsigned long mem_end)
+{
+       unsigned long entry, addr;
+       pgd_t *p_swapper_pg_dir;
+#ifdef CONFIG_KERNEL_4M_PAGES
+       unsigned long mem_size = mem_end - PAGE_OFFSET;
+       unsigned int pages = DIV_ROUND_UP(mem_size, 1 << 22);
+       unsigned int second_level_entry = 0;
+       unsigned long *second_level_table;
+#endif
+
+       /*
+        * Now copy over any MMU pgd entries already in the mmu page tables
+        * over to our root init process (swapper_pg_dir) map.  This map is
+        * then inherited by all other processes, which means all processes
+        * inherit a map of the kernel space.
+        */
+       addr = META_MEMORY_BASE;
+       entry = pgd_index(META_MEMORY_BASE);
+       p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+       while (entry < (PTRS_PER_PGD - pgd_index(META_MEMORY_BASE))) {
+               unsigned long pgd_entry;
+               /* copy over the current MMU value */
+               pgd_entry = mmu_read_first_level_page(addr);
+               pgd_val(*p_swapper_pg_dir) = pgd_entry;
+
+               p_swapper_pg_dir++;
+               addr += PGDIR_SIZE;
+               entry++;
+       }
+
+#ifdef CONFIG_KERNEL_4M_PAGES
+       /*
+        * At this point we can also map the kernel with 4MB pages to
+        * reduce TLB pressure.
+        */
+       second_level_table = alloc_bootmem_pages(SECOND_LEVEL_ALIGN * pages);
+
+       addr = PAGE_OFFSET;
+       entry = pgd_index(PAGE_OFFSET);
+       p_swapper_pg_dir = pgd_offset_k(0) + entry;
+
+       while (pages > 0) {
+               unsigned long phys_addr, second_level_phys;
+               pte_t *pte = (pte_t *)&second_level_table[second_level_entry];
+
+               phys_addr = __pa(addr);
+
+               second_level_phys = __pa(pte);
+
+               pgd_val(*p_swapper_pg_dir) = ((second_level_phys &
+                                              FIRST_LEVEL_MASK) |
+                                             _PAGE_SZ_4M |
+                                             _PAGE_PRESENT);
+
+               pte_val(*pte) = ((phys_addr & SECOND_LEVEL_MASK) |
+                                _PAGE_PRESENT | _PAGE_DIRTY |
+                                _PAGE_ACCESSED | _PAGE_WRITE |
+                                _PAGE_CACHEABLE | _PAGE_KERNEL);
+
+               p_swapper_pg_dir++;
+               addr += PGDIR_SIZE;
+               /* Second level pages must be 64byte aligned. */
+               second_level_entry += (SECOND_LEVEL_ALIGN /
+                                      sizeof(unsigned long));
+               pages--;
+       }
+       load_pgd(swapper_pg_dir, hard_processor_id());
+       flush_tlb_all();
+#endif
+
+       repriv_mmu_tables();
+       register_syscore_ops(&mmu_syscore_ops);
+}
diff --git a/arch/metag/mm/numa.c b/arch/metag/mm/numa.c
new file mode 100644 (file)
index 0000000..9ae578c
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ *  Multiple memory node support for Meta machines
+ *
+ *  Copyright (C) 2007  Paul Mundt
+ *  Copyright (C) 2010  Imagination Technologies Ltd.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/export.h>
+#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/numa.h>
+#include <linux/pfn.h>
+#include <asm/sections.h>
+
+struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
+EXPORT_SYMBOL_GPL(node_data);
+
+extern char _heap_start[];
+
+/*
+ * On Meta machines the conventional approach is to stash system RAM
+ * in node 0, and other memory blocks in to node 1 and up, ordered by
+ * latency. Each node's pgdat is node-local at the beginning of the node,
+ * immediately followed by the node mem map.
+ */
+void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end)
+{
+       unsigned long bootmap_pages, bootmem_paddr;
+       unsigned long start_pfn, end_pfn;
+       unsigned long pgdat_paddr;
+
+       /* Don't allow bogus node assignment */
+       BUG_ON(nid > MAX_NUMNODES || nid <= 0);
+
+       start_pfn = start >> PAGE_SHIFT;
+       end_pfn = end >> PAGE_SHIFT;
+
+       memblock_add(start, end - start);
+
+       memblock_set_node(PFN_PHYS(start_pfn),
+                         PFN_PHYS(end_pfn - start_pfn), nid);
+
+       /* Node-local pgdat */
+       pgdat_paddr = memblock_alloc_base(sizeof(struct pglist_data),
+                                         SMP_CACHE_BYTES, end);
+       NODE_DATA(nid) = __va(pgdat_paddr);
+       memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
+
+       NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
+       NODE_DATA(nid)->node_start_pfn = start_pfn;
+       NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
+
+       /* Node-local bootmap */
+       bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
+       bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT,
+                                           PAGE_SIZE, end);
+       init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
+                         start_pfn, end_pfn);
+
+       free_bootmem_with_active_regions(nid, end_pfn);
+
+       /* Reserve the pgdat and bootmap space with the bootmem allocator */
+       reserve_bootmem_node(NODE_DATA(nid), pgdat_paddr & PAGE_MASK,
+                            sizeof(struct pglist_data), BOOTMEM_DEFAULT);
+       reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr,
+                            bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT);
+
+       /* It's up */
+       node_set_online(nid);
+
+       /* Kick sparsemem */
+       sparse_memory_present_with_active_regions(nid);
+}
+
+void __init __weak soc_mem_setup(void)
+{
+}
diff --git a/arch/metag/tbx/Makefile b/arch/metag/tbx/Makefile
new file mode 100644 (file)
index 0000000..e994239
--- /dev/null
@@ -0,0 +1,21 @@
+#
+# Makefile for TBX library files..
+#
+
+asflags-y              += -mmetac=2.1 -Wa,-mfpu=metac21 -mdsp
+asflags-$(CONFIG_SMP)  += -DTBX_PERCPU_SP_SAVE
+
+ccflags-y              += -mmetac=2.1
+
+lib-y                  += tbicore.o
+lib-y                  += tbictx.o
+lib-y                  += tbidefr.o
+lib-y                  += tbilogf.o
+lib-y                  += tbipcx.o
+lib-y                  += tbiroot.o
+lib-y                  += tbisoft.o
+lib-y                  += tbistring.o
+lib-y                  += tbitimer.o
+
+lib-$(CONFIG_METAG_DSP)        += tbidspram.o
+lib-$(CONFIG_METAG_FPU)        += tbictxfpu.o
diff --git a/arch/metag/tbx/tbicore.S b/arch/metag/tbx/tbicore.S
new file mode 100644 (file)
index 0000000..a0838eb
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * tbicore.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Core functions needed to support use of the thread binary interface for META
+ * processors
+ */
+
+       .file   "tbicore.S"
+/* Get data structures and defines from the TBI C header */
+#include <asm/metag_mem.h>
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+       .data
+       .balign 8
+       .global ___pTBISegs
+       .type   ___pTBISegs,object
+___pTBISegs:
+       .quad   0               /* Segment list pointer with it's */
+       .size   ___pTBISegs,.-___pTBISegs
+                                       /* own id or spin-lock location */
+/*
+ * Return ___pTBISegs value specific to privilege level - not very complicated
+ * at the moment
+ *
+ * Register Usage: D0Re0 is the result, D1Re0 is used as a scratch
+ */
+       .text
+       .balign 4
+       .global ___TBISegList
+       .type   ___TBISegList,function
+___TBISegList:
+       MOVT    A1LbP,#HI(___pTBISegs)
+       ADD     A1LbP,A1LbP,#LO(___pTBISegs)
+       GETL    D0Re0,D1Re0,[A1LbP]
+       MOV     PC,D1RtP
+       .size   ___TBISegList,.-___TBISegList
+
+/*
+ * Search the segment list for a match given Id, pStart can be NULL
+ *
+ * Register Usage: D1Ar1 is pSeg, D0Ar2 is Id, D0Re0 is the result
+ *                 D0Ar4, D1Ar3 are used as a scratch
+ *                 NB: The PSTAT bit if Id in D0Ar2 may be toggled
+ */
+       .text
+       .balign 4
+       .global ___TBIFindSeg
+       .type   ___TBIFindSeg,function
+___TBIFindSeg:
+       MOVT    A1LbP,#HI(___pTBISegs)
+       ADD     A1LbP,A1LbP,#LO(___pTBISegs)
+       GETL    D1Ar3,D0Ar4,[A1LbP]     /* Read segment list head */
+       MOV     D0Re0,TXSTATUS          /* What priv level are we at? */
+       CMP     D1Ar1,#0                /* Is pStart provided? */
+/* Disable privilege adaption for now */
+       ANDT    D0Re0,D0Re0,#0  /*HI(TXSTATUS_PSTAT_BIT)  ; Is PSTAT set? Zero if not */
+       LSL     D0Re0,D0Re0,#(TBID_PSTAT_S-TXSTATUS_PSTAT_S)
+       XOR     D0Ar2,D0Ar2,D0Re0       /* Toggle Id PSTAT if privileged */
+       MOVNZ   D1Ar3,D1Ar1             /* Use pStart if provided */
+$LFindSegLoop:                 
+       ADDS    D0Re0,D1Ar3,#0          /* End of list? Load result into D0Re0 */
+       MOVZ    PC,D1RtP                /* If result is NULL we leave */
+       GETL    D1Ar3,D0Ar4,[D1Ar3]     /* Read pLink and Id */
+       CMP     D0Ar4,D0Ar2             /* Does it match? */
+       BNZ     $LFindSegLoop           /* Loop if there is no match */
+       TST     D0Re0,D0Re0             /* Clear zero flag - we found it! */
+       MOV     PC,D1RtP                /* Return */
+       .size   ___TBIFindSeg,.-___TBIFindSeg
+
+/* Useful offsets to encode the lower bits of the lock/unlock addresses */
+#define UON  (LINSYSEVENT_WR_ATOMIC_LOCK   & 0xFFF8)
+#define UOFF (LINSYSEVENT_WR_ATOMIC_UNLOCK & 0xFFF8)
+
+/*
+ * Perform a whole spin-lock sequence as used by the TBISignal routine
+ *
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ *                 (All other usage due to ___TBIPoll - D0Ar6, D1Re0)
+ */
+       .text
+       .balign 4
+       .global ___TBISpin
+       .type   ___TBISpin,function
+___TBISpin:
+       SETL    [A0StP++],D0FrT,D1RtP   /* Save our return address */
+       ORS     D0Re0,D0Re0,#1          /* Clear zero flag */
+       MOV     D1RtP,PC                /* Setup return address to form loop */
+$LSpinLoop:
+       BNZ     ___TBIPoll              /* Keep repeating if fail to set */
+       GETL    D0FrT,D1RtP,[--A0StP]   /* Restore return address */
+       MOV     PC,D1RtP                /* Return */
+       .size   ___TBISpin,.-___TBISpin
+
+/*
+ * Perform an attempt to gain access to a spin-lock and set some bits
+ * 
+ * Register Usage: D1Ar1 is pLock, D0Ar2 is Mask, D0Re0 is the result
+ *                 !!On return Zero flag is SET if we are sucessfull!!
+ *                 A0.3 is used to hold base address of system event region
+ *                 D1Re0 use to hold TXMASKI while interrupts are off
+ */
+       .text
+       .balign 4
+       .global ___TBIPoll
+       .type   ___TBIPoll,function
+___TBIPoll:
+       MOV     D1Re0,#0                /* Prepare to disable ints */
+       MOVT    A0.3,#HI(LINSYSEVENT_WR_ATOMIC_LOCK)
+       SWAP    D1Re0,TXMASKI           /* Really stop ints */
+       LOCK2                           /* Gain all locks */
+       SET     [A0.3+#UON],D1RtP       /* Stop shared memory access too */
+       DCACHE  [D1Ar1],A0.3            /* Flush Cache line */
+       GETD    D0Re0,[D1Ar1]           /* Get new state from memory or hit */
+       DCACHE  [D1Ar1],A0.3            /* Flush Cache line */
+       GETD    D0Re0,[D1Ar1]           /* Get current state */
+       TST     D0Re0,D0Ar2             /* Are we clear to send? */
+       ORZ     D0Re0,D0Re0,D0Ar2       /* Yes: So set bits and */
+       SETDZ   [D1Ar1],D0Re0           /*      transmit new state */
+       SET     [A0.3+#UOFF],D1RtP      /* Allow shared memory access */
+       LOCK0                           /* Release all locks */
+       MOV     TXMASKI,D1Re0           /* Allow ints */
+$LPollEnd:
+       XORNZ   D0Re0,D0Re0,D0Re0       /* No: Generate zero result */
+       MOV     PC,D1RtP                /* Return (NZ indicates failure) */
+       .size   ___TBIPoll,.-___TBIPoll
+
+/*
+ * End of tbicore.S
+ */
diff --git a/arch/metag/tbx/tbictx.S b/arch/metag/tbx/tbictx.S
new file mode 100644 (file)
index 0000000..19af983
--- /dev/null
@@ -0,0 +1,366 @@
+/*
+ * tbictx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+       .file   "tbictx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is NOT saved in XAX3 */
+#define A0_4
+#else
+/* Ax.4 is saved in XAX4 */
+#define A0_4 A0.4,
+#endif
+
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+/*
+ * TBIRES __TBINestInts( TBIRES State, void *pExt, int NoNestMask )
+ */
+       .text
+       .balign 4
+       .global ___TBINestInts
+       .type   ___TBINestInts,function
+___TBINestInts:
+       XOR     D0Ar4,D0Ar4,#-1                 /* D0Ar4 = ~TrigBit */
+       AND     D0Ar4,D0Ar4,#0xFFFF             /* D0Ar4 &= 0xFFFF */
+       MOV     D0Ar6,TXMASKI                   /* BGNDHALT currently enabled? */
+       TSTT    D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XCBF_BIT
+       AND     D0Ar4,D0Ar2,D0Ar4               /* D0Ar4 = Ints to allow */
+       XOR     D0Ar2,D0Ar2,D0Ar4               /* Less Ints in TrigMask */
+       BNZ     ___TBINestInts2                 /* Jump if ctx save required! */
+       TSTT    D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT  /* Is catch state dirty? */
+       OR      D0Ar4,D0Ar4,D0Ar6               /* Or in TXMASKI BGNDHALT if set */
+       TSTNZ   D0Ar4,D0Ar4                     /* Yes: AND triggers enabled */
+       MOV     D0Re0,D0Ar2                     /* Update State argument */
+       MOV     D1Re0,D1Ar1                     /*  with less Ints in TrigMask */
+       MOVZ    TXMASKI,D0Ar4                   /* Early return: Enable Ints */
+       MOVZ    PC,D1RtP                        /* Early return */
+       .size   ___TBINestInts,.-___TBINestInts
+/*
+ * Drop thru into sub-function-
+ */
+       .global ___TBINestInts2
+       .type   ___TBINestInts2,function
+___TBINestInts2:
+       MOV     D0FrT,A0FrP                     /* Full entry sequence so we */
+       ADD     A0FrP,A0StP,#0                  /*     can make sub-calls */
+       MSETL   [A0StP],D0FrT,D0.5,D0.6         /*     and preserve our result */
+       ORT     D0Ar2,D0Ar2,#TBICTX_XCBF_BIT    /* Add in XCBF save request */
+       MOV     D0.5,D0Ar2                      /* Save State in DX.5 */
+       MOV     D1.5,D1Ar1
+       OR      D0.6,D0Ar4,D0Ar6                /* Save TrigMask in D0.6 */
+       MOVT    D1RtP,#HI(___TBICtxSave)        /* Save catch buffer */
+       CALL    D1RtP,#LO(___TBICtxSave)
+       MOV     TXMASKI,D0.6                    /* Allow Ints */
+       MOV     D0Re0,D0.5                      /* Return State */
+       MOV     D1Re0,D1.5
+       MGETL   D0FrT,D0.5,D0.6,[A0FrP]         /* Full exit sequence */
+       SUB     A0StP,A0FrP,#(8*3)
+       MOV     A0FrP,D0FrT
+       MOV     PC,D1RtP
+       .size   ___TBINestInts2,.-___TBINestInts2
+
+/*
+ * void *__TBICtxSave( TBIRES State, void *pExt )
+ *
+ *       D0Ar2 contains TBICTX_*_BIT values that control what
+ *          extended data is to be saved beyond the end of D1Ar1.
+ *       These bits must be ored into the SaveMask of this structure.
+ *
+ *       Virtually all possible scratch registers are used.
+ *
+ *       The D1Ar1 parameter is only used as the basis for saving
+ *       CBUF state.
+ */
+/*
+ *       If TBICTX_XEXT_BIT is specified in State. then State.pCtx->Ext is
+ *       utilised to save the base address of the context save area and
+ *       the extended states saved. The XEXT flag then indicates that the
+ *       original state of the A0.2 and A1.2 registers from TBICTX.Ext.AX2
+ *       are stored as the first part of the extended state structure.
+ */
+       .balign 4
+       .global ___TBICtxSave
+       .type   ___TBICtxSave,function
+___TBICtxSave:
+       GETD    D0Re0,[D1Ar1+#TBICTX_SaveMask-2]        /* Get SaveMask */
+       TSTT    D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+                                               /* Just XCBF to save? */
+       MOV     A0.2,D1Ar3                      /* Save pointer into A0.2 */
+       MOV     A1.2,D1RtP                      /* Free off D0FrT:D1RtP pair */
+       BZ      $LCtxSaveCBUF                   /* Yes: Only XCBF may be saved */
+       TSTT    D0Ar2,#TBICTX_XEXT_BIT          /* Extended base-state model? */
+       BZ      $LCtxSaveXDX8
+       GETL    D0Ar6,D1Ar5,[D1Ar1+#TBICTX_Ext_AX2]     /* Get A0.2, A1.2 state */
+       MOV     D0Ar4,D0Ar2                     /* Extract Ctx.SaveFlags value */
+       ANDMT   D0Ar4,D0Ar4,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+       SETD    [D1Ar1+#TBICTX_Ext_Ctx_pExt],A0.2
+       SETD    [D1Ar1+#TBICTX_Ext_Ctx_SaveMask-2],D0Ar4
+       SETL    [A0.2++],D0Ar6,D1Ar5            /* Save A0.2, A1.2 state */
+$LCtxSaveXDX8:
+       TSTT    D0Ar2,#TBICTX_XDX8_BIT          /* Save extended DX regs? */
+       BZ      $LCtxSaveXAXX
+/*
+ * Save 8 extra DX registers
+ */
+       MSETL   [A0.2],D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15
+$LCtxSaveXAXX:
+       TSTT    D0Ar2,#TBICTX_XAXX_BIT          /* Save extended AX regs? */
+       SWAP    D0Re0,A0.2                      /* pDst into D0Re0 */
+       BZ      $LCtxSaveXHL2
+/*
+ * Save 4 extra AX registers
+ */
+       MSETL   [D0Re0], A0_4 A0.5,A0.6,A0.7    /* Save 8*3 bytes */
+$LCtxSaveXHL2:
+       TSTT    D0Ar2,#TBICTX_XHL2_BIT          /* Save hardware-loop regs? */
+       SWAP    D0Re0,A0.2                      /* pDst back into A0.2 */
+       MOV     D0Ar6,TXL1START
+       MOV     D1Ar5,TXL2START
+       BZ      $LCtxSaveXTDP
+/*
+ * Save hardware loop registers
+ */
+       SETL    [A0.2++],D0Ar6,D1Ar5            /* Save 8*1 bytes */
+       MOV     D0Ar6,TXL1END
+       MOV     D1Ar5,TXL2END
+       MOV     D0FrT,TXL1COUNT
+       MOV     D1RtP,TXL2COUNT
+       MSETL   [A0.2],D0Ar6,D0FrT              /* Save 8*2 bytes */
+/*
+ * Clear loop counters to disable any current loops
+ */
+       XOR     TXL1COUNT,D0FrT,D0FrT
+       XOR     TXL2COUNT,D1RtP,D1RtP
+$LCtxSaveXTDP:
+       TSTT    D0Ar2,#TBICTX_XTDP_BIT          /* Save per-thread DSP regs? */
+       BZ      $LCtxSaveCBUF
+/*
+ * Save per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+#ifndef CTX_NO_DSP
+D      SETL    [A0.2++],AC0.0,AC1.0            /* Save ACx.0 lower 32-bits */
+DH     SETL    [A0.2++],AC0.0,AC1.0            /* Save ACx.0 upper 32-bits */
+D      SETL    [A0.2++],D0AR.0,D1AR.0          /* Save DSP RAM registers */
+D      SETL    [A0.2++],D0AR.1,D1AR.1
+D      SETL    [A0.2++],D0AW.0,D1AW.0
+D      SETL    [A0.2++],D0AW.1,D1AW.1
+D      SETL    [A0.2++],D0BR.0,D1BR.0
+D      SETL    [A0.2++],D0BR.1,D1BR.1
+D      SETL    [A0.2++],D0BW.0,D1BW.0
+D      SETL    [A0.2++],D0BW.1,D1BW.1
+D      SETL    [A0.2++],D0ARI.0,D1ARI.0
+D      SETL    [A0.2++],D0ARI.1,D1ARI.1
+D      SETL    [A0.2++],D0AWI.0,D1AWI.0
+D      SETL    [A0.2++],D0AWI.1,D1AWI.1
+D      SETL    [A0.2++],D0BRI.0,D1BRI.0
+D      SETL    [A0.2++],D0BRI.1,D1BRI.1
+D      SETL    [A0.2++],D0BWI.0,D1BWI.0
+D      SETL    [A0.2++],D0BWI.1,D1BWI.1
+D      SETD    [A0.2++],T0
+D      SETD    [A0.2++],T1
+D      SETD    [A0.2++],T2
+D      SETD    [A0.2++],T3
+D      SETD    [A0.2++],T4
+D      SETD    [A0.2++],T5
+D      SETD    [A0.2++],T6
+D      SETD    [A0.2++],T7
+D      SETD    [A0.2++],T8
+D      SETD    [A0.2++],T9
+D      SETD    [A0.2++],TA
+D      SETD    [A0.2++],TB
+D      SETD    [A0.2++],TC
+D      SETD    [A0.2++],TD
+D      SETD    [A0.2++],TE
+D      SETD    [A0.2++],TF
+#else
+       ADD     A0.2,A0.2,#(8*18+4*16)
+#endif
+       MOV     D0Ar6,TXMRSIZE
+       MOV     D1Ar5,TXDRSIZE
+       SETL    [A0.2++],D0Ar6,D1Ar5            /* Save 8*1 bytes */
+       
+$LCtxSaveCBUF:
+#ifdef TBI_1_3
+       MOV     D0Ar4,D0Re0                     /* Copy Ctx Flags */
+       ANDT    D0Ar4,D0Ar4,#TBICTX_XCBF_BIT    /*   mask XCBF if already set */
+       XOR     D0Ar4,D0Ar4,#-1
+       AND     D0Ar2,D0Ar2,D0Ar4               /*   remove XCBF if already set */
+#endif
+       TSTT    D0Ar2,#TBICTX_XCBF_BIT          /* Want to save CBUF? */
+       ANDT    D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+       OR      D0Ar2,D0Ar2,D0Re0               /* Generate new SaveMask */
+       SETD    [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in bits saved to TBICTX */
+       MOV     D0Re0,A0.2                      /* Return end of save area */
+       MOV     D0Ar4,TXDIVTIME                 /* Get TXDIVTIME */
+       MOVZ    PC,A1.2                         /* No: Early return */
+       TSTT    D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT  /* Need to save CBUF? */
+       MOVZ    PC,A1.2                         /* No: Early return */
+       ORT     D0Ar2,D0Ar2,#TBICTX_XCBF_BIT
+       SETD    [D1Ar1+#TBICTX_SaveMask-2],D0Ar2/* Add in XCBF bit to TBICTX */
+       ADD     A0.2,D1Ar1,#TBICTX_BYTES        /* Dump CBUF state after TBICTX */
+/*
+ * Save CBUF
+ */
+       SETD    [A0.2+# 0],TXCATCH0             /* Restore TXCATCHn */
+       SETD    [A0.2+# 4],TXCATCH1
+       TSTT    D0Ar2,#TBICTX_CBRP_BIT          /* ... RDDIRTY was/is set */
+       SETD    [A0.2+# 8],TXCATCH2
+       SETD    [A0.2+#12],TXCATCH3
+       BZ      $LCtxSaveComplete
+       SETL    [A0.2+#(2*8)],RD                /* Save read pipeline */
+       SETL    [A0.2+#(3*8)],RD                /* Save read pipeline */
+       SETL    [A0.2+#(4*8)],RD                /* Save read pipeline */
+       SETL    [A0.2+#(5*8)],RD                /* Save read pipeline */
+       SETL    [A0.2+#(6*8)],RD                /* Save read pipeline */
+       SETL    [A0.2+#(7*8)],RD                /* Save read pipeline */
+       AND     TXDIVTIME,D0Ar4,#TXDIVTIME_DIV_BITS /* Clear RPDIRTY */
+$LCtxSaveComplete:
+       MOV     PC,A1.2                         /* Return */
+       .size   ___TBICtxSave,.-___TBICtxSave
+
+/*
+ * void *__TBICtxRestore( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be recovered from D1Ar3 (pExt).
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+/*
+ *     If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ *       the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ *       related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+       .balign 4
+       .global ___TBICtxRestore
+       .type   ___TBICtxRestore,function
+___TBICtxRestore:
+       GETD    D0Ar6,[D1Ar1+#TBICTX_CurrMODE]  /* Get TXMODE Value */
+       ANDST   D0Ar2,D0Ar2,#TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT
+       MOV     D1Re0,D0Ar2                     /* Keep flags in D1Re0 */
+       MOV     D0Re0,D1Ar3                     /* D1Ar3 is default result */
+       MOVZ    PC,D1RtP                        /* Early return, nothing to do */
+       ANDT    D0Ar6,D0Ar6,#0xE000             /* Top bits of TXMODE required */
+       MOV     A0.3,D0Ar6                      /* Save TXMODE for later */
+       TSTT    D1Re0,#TBICTX_XEXT_BIT          /* Check for XEXT bit */
+       BZ      $LCtxRestXDX8
+       GETD    D0Ar4,[D1Ar1+#TBICTX_SaveMask-2]/* Get current SaveMask */
+       GETL    D0Ar6,D1Ar5,[D0Re0++]           /* Restore A0.2, A1.2 state */
+       ANDMT   D0Ar4,D0Ar4,#(0xFFFF-(TBICTX_XDX8_BIT+TBICTX_XAXX_BIT+TBICTX_XHL2_BIT+TBICTX_XTDP_BIT+TBICTX_XEXT_BIT))
+       SETD    [D1Ar1+#TBICTX_SaveMask-2],D0Ar4/* New SaveMask */
+#ifdef METAC_1_0
+       SETD    [D1Ar1+#TBICTX_Ext_AX2_U0],D0Ar6
+       MOV     D0Ar6,D1Ar1
+       SETD    [D0Ar6+#TBICTX_Ext_AX2_U1],D1Ar5
+#else
+       SETL    [D1Ar1+#TBICTX_Ext_AX2],D0Ar6,D1Ar5
+#endif
+$LCtxRestXDX8:
+       TSTT    D1Re0,#TBICTX_XDX8_BIT          /* Get extended DX regs? */
+       MOV     A1.2,D1RtP                      /* Free off D1RtP register */
+       BZ      $LCtxRestXAXX
+/*
+ * Restore 8 extra DX registers
+ */
+       MGETL   D0.8,D0.9,D0.10,D0.11,D0.12,D0.13,D0.14,D0.15,[D0Re0]
+$LCtxRestXAXX:
+       TSTT    D1Re0,#TBICTX_XAXX_BIT          /* Get extended AX regs? */
+       BZ      $LCtxRestXHL2
+/*
+ * Restore 3 extra AX registers
+ */
+       MGETL   A0_4 A0.5,A0.6,A0.7,[D0Re0]     /* Get 8*3 bytes */
+$LCtxRestXHL2:
+       TSTT    D1Re0,#TBICTX_XHL2_BIT          /* Get hardware-loop regs? */
+       BZ      $LCtxRestXTDP
+/*
+ * Get hardware loop registers
+ */
+       MGETL   D0Ar6,D0Ar4,D0Ar2,[D0Re0]       /* Get 8*3 bytes */
+       MOV     TXL1START,D0Ar6
+       MOV     TXL2START,D1Ar5
+       MOV     TXL1END,D0Ar4
+       MOV     TXL2END,D1Ar3
+       MOV     TXL1COUNT,D0Ar2
+       MOV     TXL2COUNT,D1Ar1
+$LCtxRestXTDP:
+       TSTT    D1Re0,#TBICTX_XTDP_BIT          /* Get per-thread DSP regs? */
+       MOVZ    PC,A1.2                         /* No: Early return */
+/*
+ * Get per-thread DSP registers; ACC.0, PR.0, PI.1-3 (PI.0 is zero)
+ */
+       MOV     A0.2,D0Re0
+       GETL    D0Ar6,D1Ar5,[D0Re0++#((16*4)+(18*8))]
+#ifndef CTX_NO_DSP
+D      GETL    AC0.0,AC1.0,[A0.2++]            /* Restore ACx.0 lower 32-bits */
+DH     GETL    AC0.0,AC1.0,[A0.2++]            /* Restore ACx.0 upper 32-bits */
+#else
+       ADD     A0.2,A0.2,#(2*8)
+#endif
+       ADD     D0Re0,D0Re0,#(2*4)
+       MOV     TXMODE,A0.3                     /* Some TXMODE bits needed */
+       MOV     TXMRSIZE,D0Ar6
+       MOV     TXDRSIZE,D1Ar5
+#ifndef CTX_NO_DSP
+D      GETL    D0AR.0,D1AR.0,[A0.2++]          /* Restore DSP RAM registers */
+D      GETL    D0AR.1,D1AR.1,[A0.2++]
+D      GETL    D0AW.0,D1AW.0,[A0.2++]
+D      GETL    D0AW.1,D1AW.1,[A0.2++]
+D      GETL    D0BR.0,D1BR.0,[A0.2++]
+D      GETL    D0BR.1,D1BR.1,[A0.2++]
+D      GETL    D0BW.0,D1BW.0,[A0.2++]
+D      GETL    D0BW.1,D1BW.1,[A0.2++]
+#else
+       ADD     A0.2,A0.2,#(8*8)
+#endif
+       MOV     TXMODE,#0                       /* Restore TXMODE */
+#ifndef CTX_NO_DSP
+D      GETL    D0ARI.0,D1ARI.0,[A0.2++]
+D      GETL    D0ARI.1,D1ARI.1,[A0.2++]
+D      GETL    D0AWI.0,D1AWI.0,[A0.2++]
+D      GETL    D0AWI.1,D1AWI.1,[A0.2++]
+D      GETL    D0BRI.0,D1BRI.0,[A0.2++]
+D      GETL    D0BRI.1,D1BRI.1,[A0.2++]
+D      GETL    D0BWI.0,D1BWI.0,[A0.2++]
+D      GETL    D0BWI.1,D1BWI.1,[A0.2++]
+D      GETD    T0,[A0.2++]
+D      GETD    T1,[A0.2++]
+D      GETD    T2,[A0.2++]
+D      GETD    T3,[A0.2++]
+D      GETD    T4,[A0.2++]
+D      GETD    T5,[A0.2++]
+D      GETD    T6,[A0.2++]
+D      GETD    T7,[A0.2++]
+D      GETD    T8,[A0.2++]
+D      GETD    T9,[A0.2++]
+D      GETD    TA,[A0.2++]
+D      GETD    TB,[A0.2++]
+D      GETD    TC,[A0.2++]
+D      GETD    TD,[A0.2++]
+D      GETD    TE,[A0.2++]
+D      GETD    TF,[A0.2++]
+#else
+       ADD     A0.2,A0.2,#(8*8+4*16)
+#endif
+       MOV     PC,A1.2                         /* Return */
+       .size   ___TBICtxRestore,.-___TBICtxRestore
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbictxfpu.S b/arch/metag/tbx/tbictxfpu.S
new file mode 100644 (file)
index 0000000..e773bea
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * tbictxfpu.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+       .file   "tbifpuctx.S"
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+#ifdef TBI_1_4
+/*
+ * void *__TBICtxFPUSave( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be saved.
+ *                 These bits must be ored into the SaveMask of this structure.
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+       .text
+       .balign 4
+       .global ___TBICtxFPUSave
+       .type   ___TBICtxFPUSave,function
+___TBICtxFPUSave:
+
+       /* D1Ar1:D0Ar2 - State
+        * D1Ar3       - pExt
+        * D0Ar4       - Value of METAC_CORE_ID
+        * D1Ar5       - Scratch
+        * D0Ar6       - Scratch
+        */
+       
+       /* If the FPAC bit isnt set then there is nothing to do */
+       TSTT    D0Ar2,#TBICTX_FPAC_BIT
+       MOVZ    PC, D1RtP
+
+       /* Obtain the Core config */
+       MOVT    D0Ar4,        #HI(METAC_CORE_ID)
+       ADD     D0Ar4, D0Ar4, #LO(METAC_CORE_ID)
+       GETD    D0Ar4, [D0Ar4]
+
+       /* Detect FX.8 - FX.15 and add to core config */
+       MOV     D0Ar6, TXENABLE
+       AND     D0Ar6, D0Ar6, #(TXENABLE_CLASSALT_FPUR8 << TXENABLE_CLASS_S)
+       AND     D0Ar4, D0Ar4, #LO(0x0000FFFF)
+       ORT     D0Ar4, D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT)
+       XOR     D0Ar4, D0Ar4, D0Ar6
+
+       /* Save the relevant bits to the buffer */
+       SETD    [D1Ar3++], D0Ar4
+
+       /* Save the relevant bits of TXDEFR (Assumes TXDEFR is coherent) ... */
+       MOV     D0Ar6, TXDEFR
+       LSR     D0Re0, D0Ar6, #8
+       AND     D0Re0, D0Re0, #LO(TXDEFR_FPE_FE_BITS>>8)
+       AND     D0Ar6, D0Ar6, #LO(TXDEFR_FPE_ICTRL_BITS)
+       OR      D0Re0, D0Re0, D0Ar6
+
+       /* ... along with relevant bits of TXMODE to buffer */
+       MOV     D0Ar6, TXMODE
+       ANDT    D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS)
+       ORT     D0Ar6, D0Ar6, #HI(TXMODE_FPURMODEWRITE_BIT)
+       OR      D0Ar6, D0Ar6, D0Re0
+       SETD    [D1Ar3++], D0Ar6
+
+       GETD    D0Ar6,[D1Ar1+#TBICTX_SaveMask-2] /* Get the current SaveMask */
+       /* D0Ar6       - pCtx->SaveMask */
+
+       TSTT    D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+                                                   * to avoid stalls
+                                                   */
+       /* Save the standard FPU registers */
+F      MSETL   [D1Ar3++], FX.0, FX.2, FX.4, FX.6
+
+       /* Save the extended FPU registers if they are present */
+       BZ      $Lskip_save_fx8_fx16
+F      MSETL   [D1Ar3++], FX.8, FX.10, FX.12, FX.14
+$Lskip_save_fx8_fx16:
+
+       /* Save the FPU Accumulator if it is present */
+       TST     D0Ar4, #METAC_COREID_NOFPACC_BIT
+       BNZ     $Lskip_save_fpacc
+F      SETL    [D1Ar3++], ACF.0
+F      SETL    [D1Ar3++], ACF.1
+F      SETL    [D1Ar3++], ACF.2
+$Lskip_save_fpacc:
+
+       /* Update pCtx->SaveMask */
+       ANDT    D0Ar2, D0Ar2, #TBICTX_FPAC_BIT
+       OR      D0Ar6, D0Ar6, D0Ar2
+       SETD    [D1Ar1+#TBICTX_SaveMask-2],D0Ar6/* Add in XCBF bit to TBICTX */
+
+       MOV     D0Re0, D1Ar3 /* Return end of save area */
+       MOV     PC, D1RtP
+
+       .size   ___TBICtxFPUSave,.-___TBICtxFPUSave
+
+/*
+ * void *__TBICtxFPURestore( TBIRES State, void *pExt )
+ *
+ *                 D0Ar2 contains TBICTX_*_BIT values that control what
+ *                    extended data is to be recovered from D1Ar3 (pExt).
+ *
+ *                 Virtually all possible scratch registers are used.
+ */
+/*
+ * If TBICTX_XEXT_BIT is specified in State. Then the saved state of
+ *       the orginal A0.2 and A1.2 is restored from pExt and the XEXT
+ *       related flags are removed from State.pCtx->SaveMask.
+ *
+ */
+       .balign 4
+       .global ___TBICtxFPURestore
+       .type   ___TBICtxFPURestore,function
+___TBICtxFPURestore:
+
+       /* D1Ar1:D0Ar2 - State
+        * D1Ar3       - pExt
+        * D0Ar4       - Value of METAC_CORE_ID
+        * D1Ar5       - Scratch
+        * D0Ar6       - Scratch
+        * D1Re0       - Scratch
+        */
+
+       /* If the FPAC bit isnt set then there is nothing to do */
+       TSTT    D0Ar2,#TBICTX_FPAC_BIT
+       MOVZ    PC, D1RtP
+
+       /* Obtain the relevant bits of the Core config */
+       GETD    D0Ar4, [D1Ar3++]
+
+       /* Restore FPU related parts of TXDEFR. Assumes TXDEFR is coherent */
+       GETD    D1Ar5, [D1Ar3++]
+       MOV     D0Ar6, D1Ar5
+       LSL     D1Re0, D1Ar5, #8
+       ANDT    D1Re0, D1Re0, #HI(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+       AND     D1Ar5, D1Ar5, #LO(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS)
+       OR      D1Re0, D1Re0, D1Ar5
+
+       MOV     D1Ar5, TXDEFR
+       ANDMT   D1Ar5, D1Ar5, #HI(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+       ANDMB   D1Ar5, D1Ar5, #LO(~(TXDEFR_FPE_FE_BITS|TXDEFR_FPE_ICTRL_BITS))
+       OR      D1Re0, D1Re0, D1Ar5
+       MOV     TXDEFR, D1Re0
+
+       /* Restore relevant bits of TXMODE */
+       MOV     D1Ar5, TXMODE
+       ANDMT   D1Ar5, D1Ar5, #HI(~TXMODE_FPURMODE_BITS)
+       ANDT    D0Ar6, D0Ar6, #HI(TXMODE_FPURMODE_BITS|TXMODE_FPURMODEWRITE_BIT)
+       OR      D0Ar6, D0Ar6, D1Ar5
+       MOV     TXMODE, D0Ar6
+
+       TSTT    D0Ar4, #HI(TBICTX_CFGFPU_FX16_BIT) /* Perform test here for extended FPU registers
+                                                   * to avoid stalls
+                                                   */
+       /* Save the standard FPU registers */
+F      MGETL   FX.0, FX.2, FX.4, FX.6, [D1Ar3++]
+
+       /* Save the extended FPU registers if they are present */
+       BZ      $Lskip_restore_fx8_fx16
+F      MGETL   FX.8, FX.10, FX.12, FX.14, [D1Ar3++]
+$Lskip_restore_fx8_fx16:
+
+       /* Save the FPU Accumulator if it is present */
+       TST     D0Ar4, #METAC_COREID_NOFPACC_BIT
+       BNZ     $Lskip_restore_fpacc
+F      GETL    ACF.0, [D1Ar3++]
+F      GETL    ACF.1, [D1Ar3++]
+F      GETL    ACF.2, [D1Ar3++]
+$Lskip_restore_fpacc:
+
+       MOV     D0Re0, D1Ar3 /* Return end of save area */
+       MOV     PC, D1RtP
+
+       .size   ___TBICtxFPURestore,.-___TBICtxFPURestore
+
+#endif /* TBI_1_4 */
+
+/*
+ * End of tbictx.S
+ */
diff --git a/arch/metag/tbx/tbidefr.S b/arch/metag/tbx/tbidefr.S
new file mode 100644 (file)
index 0000000..3eb165e
--- /dev/null
@@ -0,0 +1,175 @@
+/*
+ * tbidefr.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Routing deferred exceptions
+ */
+
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+       .text
+       .balign 4
+       .global ___TBIHandleDFR
+       .type   ___TBIHandleDFR,function
+/* D1Ar1:D0Ar2 -- State
+ * D0Ar3       -- SigNum
+ * D0Ar4       -- Triggers
+ * D1Ar5       -- InstOrSWSId
+ * D0Ar6       -- pTBI (volatile)
+ */
+___TBIHandleDFR:
+#ifdef META_BUG_MBN100212
+       MSETL   [A0StP++], D0FrT, D0.5
+
+       /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+        * D0Ar4       -- The deferred exceptions
+        * D1Ar3       -- As per D0Ar4 but just the trigger bits
+        * D0.5        -- The bgnd deferred exceptions
+        * D1.5        -- TXDEFR with bgnd re-added
+        */
+
+       /* - Collect the pending deferred exceptions using TXSTAT,
+        *   (ack's the bgnd exceptions as a side-effect)
+        * - Manually collect remaining (interrupt) deferred exceptions
+        *   using TXDEFR
+        * - Replace the triggers (from TXSTATI) with the int deferred
+        *   exceptions DEFR ..., TXSTATI would have returned if it was valid
+        *   from bgnd code
+        * - Reconstruct TXDEFR by or'ing bgnd deferred exceptions (except
+        *   the DEFER bit) and the int deferred exceptions. This will be
+        *   restored later
+        */
+       DEFR    D0.5,  TXSTAT
+       MOV     D1.5,  TXDEFR
+       ANDT    D0.5,  D0.5, #HI(0xFFFF0000)
+       MOV     D1Ar3, D1.5
+       ANDT    D1Ar3, D1Ar3, #HI(0xFFFF0000)
+       OR      D0Ar4, D1Ar3, #TXSTAT_DEFER_BIT
+       OR      D1.5, D1.5, D0.5
+
+       /* Mask off anything unrelated to the deferred exception triggers */
+       ANDT    D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+       /* Can assume that at least one exception happened since this
+        * handler wouldnt have been called otherwise.
+        * 
+        * Replace the signal number and at the same time, prepare
+        * the mask to acknowledge the exception
+        *
+        * D1Re0 -- The bits to acknowledge
+        * D1Ar3 -- The signal number
+        * D1RtP -- Scratch to deal with non-conditional insns
+        */
+       MOVT    D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+       MOV     D1RtP, #TXSTAT_FPE_INVALID_S
+       FFB     D1Ar3, D1Ar3
+       CMP     D1Ar3, #TXSTAT_FPE_INVALID_S
+       MOVLE   D1Ar3, D1RtP /* Collapse FPE triggers to a single signal */
+       MOV     D1RtP, #1
+       LSLGT   D1Re0, D1RtP, D1Ar3
+
+       /* Get the handler using the signal number
+        *
+        * D1Ar3 -- The signal number
+        * D0Re0 -- Offset into TBI struct containing handler address
+        * D1Re0 -- Mask of triggers to keep
+        * D1RtP -- Address of handler
+        */
+       SUB     D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+       LSL     D0Re0, D1Ar3, #2
+       XOR     D1Re0, D1Re0, #-1   /* Prepare mask for acknowledge (avoids stall) */
+       ADD     D0Re0,D0Re0,#TBI_fnSigs
+       GETD    D1RtP, [D0Ar6+D0Re0]
+
+       /* Acknowledge triggers */
+       AND     D1.5, D1.5, D1Re0
+
+       /* Restore remaining exceptions
+        * Do this here in case the handler enables nested interrupts
+        *
+        * D1.5 -- TXDEFR with this exception ack'd
+        */
+       MOV     TXDEFR, D1.5
+
+       /* Call the handler */
+       SWAP    D1RtP, PC
+
+       GETL    D0.5,  D1.5,  [--A0StP]
+       GETL    D0FrT, D1RtP, [--A0StP]
+       MOV     PC,D1RtP
+#else  /* META_BUG_MBN100212 */
+
+       /* D1Ar1,D0Ar2,D1Ar5,D0Ar6 -- Arguments to handler, must be preserved
+        * D0Ar4       -- The deferred exceptions
+        * D1Ar3       -- As per D0Ar4 but just the trigger bits
+        */
+
+       /* - Collect the pending deferred exceptions using TXSTAT,
+        *   (ack's the interrupt exceptions as a side-effect)
+        */
+       DEFR    D0Ar4, TXSTATI
+
+       /* Mask off anything unrelated to the deferred exception triggers */
+       MOV     D1Ar3, D0Ar4
+       ANDT    D1Ar3, D1Ar3, #HI(TXSTAT_BUSERR_BIT | TXSTAT_FPE_BITS)
+
+       /* Can assume that at least one exception happened since this
+        * handler wouldnt have been called otherwise.
+        * 
+        * Replace the signal number and at the same time, prepare
+        * the mask to acknowledge the exception
+        *
+        * The unusual code for 1<<D1Ar3 may need explanation.
+        * Normally this would be done using 'MOV rs,#1' and 'LSL rd,rs,D1Ar3'
+        * but only D1Re0 is available in D1 and no crossunit insns are available
+        * Even worse, there is no conditional 'MOV r,#uimm8'.
+        * Since the CMP proves that D1Ar3 >= 20, we can reuse the bottom 12-bits
+        * of D1Re0 (using 'ORGT r,#1') in the knowledge that the top 20-bits will
+        * be discarded without affecting the result.
+        *
+        * D1Re0 -- The bits to acknowledge
+        * D1Ar3 -- The signal number
+        */
+       MOVT    D1Re0, #HI(TXSTAT_FPE_BITS & ~TXSTAT_FPE_DENORMAL_BIT)
+       MOV     D0Re0, #TXSTAT_FPE_INVALID_S
+       FFB     D1Ar3, D1Ar3
+       CMP     D1Ar3, #TXSTAT_FPE_INVALID_S
+       MOVLE   D1Ar3, D0Re0 /* Collapse FPE triggers to a single signal */
+       ORGT    D1Re0, D1Re0, #1
+       LSLGT   D1Re0, D1Re0, D1Ar3
+
+       SUB     D1Ar3, D1Ar3, #(TXSTAT_FPE_INVALID_S - TBID_SIGNUM_FPE)
+
+       /* Acknowledge triggers and restore remaining exceptions
+        * Do this here in case the handler enables nested interrupts
+        *
+        * (x | y) ^ y == x & ~y. It avoids the restrictive XOR ...,#-1 insn
+        * and is the same length
+        */
+       MOV     D0Re0, TXDEFR
+       OR      D0Re0, D0Re0, D1Re0
+       XOR     TXDEFR, D0Re0, D1Re0
+
+       /* Get the handler using the signal number
+        *
+        * D1Ar3 -- The signal number
+        * D0Re0 -- Address of handler
+        */
+       LSL     D0Re0, D1Ar3, #2
+       ADD     D0Re0,D0Re0,#TBI_fnSigs
+       GETD    D0Re0, [D0Ar6+D0Re0]
+
+       /* Tailcall the handler */
+       MOV     PC,D0Re0
+
+#endif /* META_BUG_MBN100212 */
+       .size   ___TBIHandleDFR,.-___TBIHandleDFR
+/*
+ * End of tbidefr.S
+ */
diff --git a/arch/metag/tbx/tbidspram.S b/arch/metag/tbx/tbidspram.S
new file mode 100644 (file)
index 0000000..2f27c03
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * tbidspram.S
+ *
+ * Copyright (C) 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Explicit state save and restore routines forming part of the thread binary
+ * interface for META processors
+ */
+
+       .file   "tbidspram.S"
+
+/* These aren't generally useful to a user so for now, they arent publically available */
+#define _TBIECH_DSPRAM_DUA_S    8
+#define _TBIECH_DSPRAM_DUA_BITS 0x7f00
+#define _TBIECH_DSPRAM_DUB_S    0
+#define _TBIECH_DSPRAM_DUB_BITS 0x007f
+
+/*
+ * void *__TBIDspramSaveA( short DspramSizes, void *pExt )
+ */
+       .text
+       .balign 4
+       .global ___TBIDspramSaveA
+       .type   ___TBIDspramSaveA,function
+___TBIDspramSaveA:
+
+       SETL    [A0StP++], D0.5, D1.5
+       MOV     A0.3, D0Ar2
+
+       /* D1Ar1 - Dspram Sizes
+        * A0.4  - Pointer to buffer
+        */
+
+       /* Save the specified amount of dspram DUA */
+DL     MOV     D0AR.0, #0
+       LSR     D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+       AND     D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+       SUB     TXRPT, D1Ar1, #1
+$L1:
+DL     MOV     D0Re0, [D0AR.0++]
+DL     MOV     D0Ar6, [D0AR.0++]
+DL     MOV     D0Ar4, [D0AR.0++]
+DL     MOV     D0.5,  [D0AR.0++]
+       MSETL   [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+       BR      $L1
+
+       GETL    D0.5, D1.5, [--A0StP]
+       MOV     PC, D1RtP
+
+       .size   ___TBIDspramSaveA,.-___TBIDspramSaveA
+
+/*
+ * void *__TBIDspramSaveB( short DspramSizes, void *pExt )
+ */
+       .balign 4
+       .global ___TBIDspramSaveB
+       .type   ___TBIDspramSaveB,function
+___TBIDspramSaveB:
+
+       SETL    [A0StP++], D0.5, D1.5
+       MOV     A0.3, D0Ar2
+
+       /* D1Ar1 - Dspram Sizes
+        * A0.3  - Pointer to buffer
+        */
+
+       /* Save the specified amount of dspram DUA */
+DL     MOV     D0BR.0, #0
+       LSR     D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+       AND     D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+       SUB     TXRPT, D1Ar1, #1
+$L2:
+DL     MOV     D0Re0, [D0BR.0++]
+DL     MOV     D0Ar6, [D0BR.0++]
+DL     MOV     D0Ar4, [D0BR.0++]
+DL     MOV     D0.5,  [D0BR.0++]
+       MSETL   [A0.3++], D0Re0, D0Ar6, D0Ar4, D0.5
+
+       BR      $L2
+
+       GETL    D0.5, D1.5, [--A0StP]
+       MOV     PC, D1RtP
+
+       .size   ___TBIDspramSaveB,.-___TBIDspramSaveB
+
+/*
+ * void *__TBIDspramRestoreA( short DspramSizes, void *pExt )
+ */
+       .balign 4
+       .global ___TBIDspramRestoreA
+       .type   ___TBIDspramRestoreA,function
+___TBIDspramRestoreA:
+
+       SETL    [A0StP++], D0.5, D1.5
+       MOV     A0.3, D0Ar2
+
+       /* D1Ar1 - Dspram Sizes
+        * A0.3 - Pointer to buffer
+        */
+
+       /* Restore the specified amount of dspram DUA */
+DL     MOV     D0AW.0, #0
+       LSR     D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUA_S
+       AND     D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUA_BITS >> _TBIECH_DSPRAM_DUA_S)
+       SUB     TXRPT, D1Ar1, #1
+$L3:
+       MGETL   D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL     MOV     [D0AW.0++], D0Re0
+DL     MOV     [D0AW.0++], D0Ar6
+DL     MOV     [D0AW.0++], D0Ar4
+DL     MOV     [D0AW.0++], D0.5
+
+       BR      $L3
+
+       GETL    D0.5, D1.5, [--A0StP]
+       MOV     PC, D1RtP
+
+       .size   ___TBIDspramRestoreA,.-___TBIDspramRestoreA
+
+/*
+ * void *__TBIDspramRestoreB( short DspramSizes, void *pExt )
+ */
+       .balign 4
+       .global ___TBIDspramRestoreB
+       .type   ___TBIDspramRestoreB,function
+___TBIDspramRestoreB:
+
+       SETL    [A0StP++], D0.5, D1.5
+       MOV     A0.3, D0Ar2
+
+       /* D1Ar1 - Dspram Sizes
+        * A0.3 - Pointer to buffer
+        */
+
+       /* Restore the specified amount of dspram DUA */
+DL     MOV     D0BW.0, #0
+       LSR     D1Ar1, D1Ar1, #_TBIECH_DSPRAM_DUB_S
+       AND     D1Ar1, D1Ar1, #(_TBIECH_DSPRAM_DUB_BITS >> _TBIECH_DSPRAM_DUB_S)
+       SUB     TXRPT, D1Ar1, #1
+$L4:
+       MGETL   D0Re0, D0Ar6, D0Ar4, D0.5, [A0.3++]
+DL     MOV     [D0BW.0++], D0Re0
+DL     MOV     [D0BW.0++], D0Ar6
+DL     MOV     [D0BW.0++], D0Ar4
+DL     MOV     [D0BW.0++], D0.5
+
+       BR      $L4
+
+       GETL    D0.5, D1.5, [--A0StP]
+       MOV     PC, D1RtP
+
+       .size   ___TBIDspramRestoreB,.-___TBIDspramRestoreB
+
+/*
+ * End of tbidspram.S
+ */
diff --git a/arch/metag/tbx/tbilogf.S b/arch/metag/tbx/tbilogf.S
new file mode 100644 (file)
index 0000000..4a34d80
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * tbilogf.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Defines __TBILogF trap code for debugging messages and __TBICont for debug
+ * assert to be implemented on.
+ */
+
+       .file   "tbilogf.S"
+
+/*
+ * Perform console printf using external debugger or host support
+ */
+       .text
+       .balign 4
+       .global ___TBILogF
+       .type   ___TBILogF,function
+___TBILogF:
+       MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2
+       SWITCH  #0xC10020
+       MOV     D0Re0,#0
+       SUB     A0StP,A0StP,#24
+       MOV     PC,D1RtP
+       .size   ___TBILogF,.-___TBILogF
+
+/*
+ * Perform wait for continue under control of the debugger
+ */
+       .text
+       .balign 4
+       .global ___TBICont
+       .type   ___TBICont,function
+___TBICont:
+       MOV     D0Ar6,#1
+       MSETL   [A0StP],D0Ar6,D0Ar4,D0Ar2
+       SWITCH  #0xC30006       /* Returns if we are to continue */
+       SUB     A0StP,A0StP,#(8*3)
+       MOV     PC,D1RtP        /* Return */
+       .size   ___TBICont,.-___TBICont
+
+/*
+ * End of tbilogf.S
+ */
diff --git a/arch/metag/tbx/tbipcx.S b/arch/metag/tbx/tbipcx.S
new file mode 100644 (file)
index 0000000..de0626f
--- /dev/null
@@ -0,0 +1,451 @@
+/*
+ * tbipcx.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2009, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Asyncronous trigger handling including exceptions
+ */
+
+       .file   "tbipcx.S"
+#include <asm/metag_regs.h>
+#include <asm/tbx.h>
+
+/* BEGIN HACK */
+/* define these for now while doing inital conversion to GAS 
+   will fix properly later */
+
+/* Signal identifiers always have the TBID_SIGNAL_BIT set and contain the
+   following related bit-fields */
+#define TBID_SIGNUM_S       2
+
+/* END HACK */
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4  ,A0.4
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+#ifdef METAC_1_1
+#ifndef BOOTROM
+#ifndef SPECIAL_BUILD
+/* Jump straight into the boot ROM version of this code */
+#define CODE_USES_BOOTROM
+#endif
+#endif
+#endif
+
+/* Define space needed for CATCH buffer state in traditional units */
+#define CATCH_ENTRIES      5
+#define CATCH_ENTRY_BYTES 16
+
+#ifndef CODE_USES_BOOTROM
+#define A0GblIStP      A0.15  /* PTBICTX for current thread in PRIV system */
+#define A1GblIGbP      A1.15  /* Interrupt A1GbP value in PRIV system */
+#endif
+
+/*
+ * TBIRES __TBIASyncTrigger( TBIRES State )
+ */
+       .text
+       .balign 4
+       .global ___TBIASyncTrigger
+       .type   ___TBIASyncTrigger,function
+___TBIASyncTrigger:
+#ifdef CODE_USES_BOOTROM
+       MOVT    D0Re0,#HI(LINCORE_BASE)
+       JUMP    D0Re0,#0xA0
+#else
+       MOV     D0FrT,A0FrP                     /* Boing entry sequence */
+       ADD     A0FrP,A0StP,#0
+       SETL    [A0StP++],D0FrT,D1RtP
+       MOV     D0Re0,PCX                       /* Check for repeat call */
+       MOVT    D0FrT,#HI(___TBIBoingRTI+4)
+       ADD     D0FrT,D0FrT,#LO(___TBIBoingRTI+4)
+       CMP     D0Re0,D0FrT
+       BEQ     ___TBIBoingExit                 /* Already set up - come out */
+       ADD     D1Ar1,D1Ar1,#7                  /* PRIV system stack here */
+       MOV     A0.2,A0StP                      /*  else push context here */
+       MOVS    D0Re0,D0Ar2                     /* Return in user mode? */
+       ANDMB   D1Ar1,D1Ar1,#0xfff8             /*  align priv stack to 64-bit */
+       MOV     D1Re0,D1Ar1                     /*   and set result to arg */
+       MOVMI   A0.2,D1Ar1                      /*  use priv stack if PRIV set                   */
+/*
+ * Generate an initial TBICTX to return to our own current call context
+ */
+       MOVT    D1Ar5,#HI(___TBIBoingExit)      /* Go here to return */
+       ADD     D1Ar5,D1Ar5,#LO(___TBIBoingExit)
+       ADD     A0.3,A0.2,#TBICTX_DX            /* DX Save area */
+       ANDT    D0Ar2,D0Ar2,#TBICTX_PRIV_BIT    /* Extract PRIV bit */
+       MOVT    D0Ar6,#TBICTX_SOFT_BIT          /* Only soft thread state */
+       ADD     D0Ar6,D0Ar6,D0Ar2               /* Add in PRIV bit if requested */
+       SETL    [A0.2],D0Ar6,D1Ar5              /* Push header fields */
+       ADD     D0FrT,A0.2,#TBICTX_AX           /* Address AX save area */
+       MSETL   [A0.3],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+       MOV     D0Ar6,#0
+       MOV     D1Ar5,#0
+       SETL    [A0.3++],D0Ar6,D1Ar5            /* Zero CT register states */
+       SETL    [A0.3++],D0Ar6,D1Ar5
+       MSETL   [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+       MOV     A0FrP,A0.2                      /* Restore me! */
+       B       ___TBIResume
+       .size   ___TBIASyncTrigger,.-___TBIASyncTrigger
+
+/*
+ * Optimised return to handler for META Core
+ */
+___TBIBoingRTH:
+       RTH                                     /* Go to background level */
+       MOVT    A0.2,     #HI($Lpcx_target)
+       ADD     A0.2,A0.2,#LO($Lpcx_target)
+       MOV     PCX,A0.2                        /* Setup PCX for interrupts */
+       MOV     PC,D1Re0                        /* Jump to handler */
+/*
+ * This is where the code below needs to jump to wait for outermost interrupt
+ * event in a non-privilege mode system (single shared interrupt stack).
+ */
+___TBIBoingPCX:
+       MGETL   A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+       MOV     TXSTATUS,D0Re0                  /* Restore flags */
+       GETL    D0Re0,D1Re0,[D1Re0+#TBICTX_DX-TBICTX_BYTES]
+___TBIBoingRTI:
+       RTI                                     /* Wait for interrupt */
+$Lpcx_target:
+/*
+ * Save initial interrupt state on current stack
+ */
+       SETL    [A0StP+#TBICTX_DX],D0Re0,D1Re0  /* Save key registers */
+       ADD     D1Re0,A0StP,#TBICTX_AX          /* Address AX save area */
+       MOV     D0Re0,TXSTATUS                  /* Read TXSTATUS into D0Re0 */
+       MOV     TXSTATUS,#0                     /* Clear TXSTATUS */
+       MSETL   [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+/*
+ * Register state at this point is-
+ *
+ *     D0Re0 - Old TXSTATUS with PRIV and CBUF bits set if appropriate
+ *     A0StP - Is call stack frame and base of TBICTX being generated
+ *     A1GbP - Is valid static access link
+ */
+___TBIBoing:
+       LOCK0                                   /* Make sure we have no locks! */
+       ADD     A1.2,A0StP,#TBICTX_DX+(8*1)     /* Address DX.1 save area */
+       MOV     A0FrP,A0StP                     /* Setup frame pointer */
+       MSETL   [A1.2],D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+       MOV     D0Ar4,TXRPT                     /* Save critical CT regs */
+       MOV     D1Ar3,TXBPOBITS
+       MOV     D1Ar1,TXDIVTIME                 /* Calc catch buffer pSrc */
+       MOV     D0Ar2,TXMODE
+       MOV     TXMODE,#0                       /* Clear TXMODE */
+#ifdef TXDIVTIME_RPDIRTY_BIT
+       TSTT    D1Ar1,#HI(TXDIVTIME_RPDIRTY_BIT)/* NZ = RPDIRTY */
+       MOVT    D0Ar6,#TBICTX_CBRP_BIT
+       ORNZ    D0Re0,D0Re0,D0Ar6               /* Set CBRP if RPDIRTY set */
+#endif
+       MSETL   [A1.2],D0Ar4,D0Ar2              /* Save CT regs state */
+       MOV     D0Ar2,D0Re0                     /* Copy TXSTATUS */
+       ANDMT   D0Ar2,D0Ar2,#TBICTX_CBUF_BIT+TBICTX_CBRP_BIT
+#ifdef TBI_1_4
+       MOVT    D1Ar1,#TBICTX_FPAC_BIT          /* Copy FPActive into FPAC */
+       TSTT    D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+       ORNZ    D0Ar2,D0Ar2,D1Ar1
+#endif
+       MOV     D1Ar1,PCX                       /* Read CurrPC */
+       ORT     D0Ar2,D0Ar2,#TBICTX_CRIT_BIT    /* SaveMask + CRIT bit */
+       SETL    [A0FrP+#TBICTX_Flags],D0Ar2,D1Ar1 /* Set pCtx header fields */
+/*
+ * Completed context save, now we need to make a call to an interrupt handler
+ *
+ *     D0Re0 - holds PRIV, WAIT, CBUF flags, HALT reason if appropriate
+ *     A0FrP - interrupt stack frame and base of TBICTX being generated
+ *     A0StP - same as A0FrP
+ */
+___TBIBoingWait:
+                               /* Reserve space for TBICTX and CBUF */
+       ADD     A0StP,A0StP,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+       MOV     D0Ar4,TXSTATI                   /* Read the Triggers data */
+       MOV     D1Ar3,TXDIVTIME                 /* Read IRQEnc bits */
+       MOV     D0Ar2,D0Re0                     /* Copy PRIV and WAIT flags */
+       ANDT    D0Ar2,D0Ar2,#TBICTX_PRIV_BIT+TBICTX_WAIT_BIT+TBICTX_CBUF_BIT
+#ifdef TBI_1_4
+       MOVT    D1Ar5,#TBICTX_FPAC_BIT          /* Copy FPActive into FPAC */
+       TSTT    D0Re0,#HI(TXSTATUS_FPACTIVE_BIT)
+       ORNZ    D0Ar2,D0Ar2,D1Ar5
+#endif
+       ANDT    D1Ar3,D1Ar3,#HI(TXDIVTIME_IRQENC_BITS)
+       LSR     D1Ar3,D1Ar3,#TXDIVTIME_IRQENC_S
+       AND     TXSTATI,D0Ar4,#TXSTATI_BGNDHALT_BIT/* Ack any HALT seen */
+       ANDS    D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* Only seen HALT? */
+       ORT     D0Ar2,D0Ar2,#TBICTX_CRIT_BIT    /* Set CRIT */
+#ifndef BOOTROM
+       MOVT    A1LbP,#HI(___pTBIs)
+       ADD     A1LbP,A1LbP,#LO(___pTBIs)
+       GETL    D1Ar5,D0Ar6,[A1LbP]             /* D0Ar6 = ___pTBIs[1] */
+#else
+/*
+ * For BOOTROM support ___pTBIs must be allocated at offset 0 vs A1GbP
+ */
+       GETL    D1Ar5,D0Ar6,[A1GbP]                     /* D0Ar6 = ___pTBIs[1] */
+#endif
+       BZ      ___TBIBoingHalt                 /* Yes: Service HALT */
+/*
+ * Encode interrupt as signal vector, strip away same/lower TXMASKI bits
+ */
+       MOV     D1Ar1,#1                        /* Generate mask for this bit */
+       MOV     D0Re0,TXMASKI                   /* Get interrupt mask */
+       LSL     TXSTATI,D1Ar1,D1Ar3             /* Acknowledge trigger */
+       AND     TXMASKI,D0Re0,#TXSTATI_BGNDHALT_BIT     /* Only allow HALTs */
+       OR      D0Ar2,D0Ar2,D0Re0               /* Set TBIRES.Sig.TrigMask */
+       ADD     D1Ar3,D1Ar3,#TBID_SIGNUM_TRT    /* Offset into interrupt sigs */
+       LSL     D0Re0,D1Ar3,#TBID_SIGNUM_S      /* Generate offset from SigNum */
+/*
+ * This is a key moment we are about to call the handler, register state is
+ * as follows-
+ *
+ *     D0Re0 - Handler vector (SigNum<<TBID_SIGNUM_S)
+ *     D0Ar2 - TXMASKI:TBICTX_CRIT_BIT with optional CBUF and PRIV bits
+ *     D1Ar3 - SigNum
+ *     D0Ar4 - State read from TXSTATI
+ *     D1Ar5 - Inst for SWITCH trigger case only, otherwise undefined
+ *     D0Ar6 - pTBI
+ */
+___TBIBoingVec:
+       ADD     D0Re0,D0Re0,#TBI_fnSigs         /* Offset into signal table */
+       GETD    D1Re0,[D0Ar6+D0Re0]             /* Get address for Handler */
+/*
+ * Call handler at interrupt level, when it returns simply resume execution
+ * of state indicated by D1Re0.
+ */
+       MOV     D1Ar1,A0FrP                     /* Pass in pCtx */
+       CALLR   D1RtP,___TBIBoingRTH            /* Use RTH to invoke handler */
+       
+/*
+ * Perform critical state restore and execute background thread.
+ *
+ *     A0FrP - is pointer to TBICTX structure to resume
+ *     D0Re0 - contains additional TXMASKI triggers
+ */
+       .text
+       .balign 4
+#ifdef BOOTROM
+       .global ___TBIResume
+#endif
+___TBIResume:
+/*
+ * New META IP method
+ */
+       RTH                                     /* Go to interrupt level */
+       MOV     D0Ar4,TXMASKI                   /* Read TXMASKI */
+       OR      TXMASKI,D0Ar4,D0Re0             /* -Write-Modify TXMASKI */
+       GETL    D0Re0,D1Re0,[A0FrP+#TBICTX_Flags]/* Get Flags:SaveMask, CurrPC */
+       MOV     A0StP,A0FrP                     /* Position stack pointer */
+       MOV     D0Ar2,TXPOLLI                   /* Read pending triggers */
+       MOV     PCX,D1Re0                       /* Set resumption PC */
+       TST     D0Ar2,#0xFFFF                   /* Any pending triggers? */
+       BNZ     ___TBIBoingWait                 /* Yes: Go for triggers */
+       TSTT    D0Re0,#TBICTX_WAIT_BIT          /* Do we WAIT anyway? */
+       BNZ     ___TBIBoingWait                 /* Yes: Go for triggers */
+       LSLS    D1Ar5,D0Re0,#1                  /* Test XCBF (MI) & PRIV (CS)? */
+       ADD     D1Re0,A0FrP,#TBICTX_CurrRPT     /* Address CT save area */
+       ADD     A0StP,A0FrP,#TBICTX_DX+(8*1)    /* Address DX.1 save area */
+       MGETL   A0.2,A0.3,[D1Re0]               /* Get CT reg states */
+       MOV     D1Ar3,A1.3                      /* Copy old TXDIVTIME */
+       BPL     ___TBIResCrit                   /* No: Skip logic */
+       ADD     D0Ar4,A0FrP,#TBICTX_BYTES       /* Source is after TBICTX */
+       ANDST   D1Ar3,D1Ar3,#HI(TXDIVTIME_RPMASK_BITS)/* !Z if RPDIRTY */
+       MGETL   D0.5,D0.6,[D0Ar4]               /* Read Catch state */
+       MOV     TXCATCH0,D0.5                   /* Restore TXCATCHn */
+       MOV     TXCATCH1,D1.5
+       MOV     TXCATCH2,D0.6
+       MOV     TXCATCH3,D1.6
+       BZ      ___TBIResCrit
+       MOV     D0Ar2,#(1*8)
+       LSRS    D1Ar3,D1Ar3,#TXDIVTIME_RPMASK_S+1 /* 2nd RPMASK bit -> bit 0 */
+       ADD     RA,D0Ar4,#(0*8)                 /* Re-read read pipeline */
+       ADDNZ   RA,D0Ar4,D0Ar2                  /* If Bit 0 set issue RA */
+       LSRS    D1Ar3,D1Ar3,#2                  /* Bit 1 -> C, Bit 2 -> Bit 0 */
+       ADD     D0Ar2,D0Ar2,#8
+       ADDCS   RA,D0Ar4,D0Ar2                  /* If C issue RA */
+       ADD     D0Ar2,D0Ar2,#8
+       ADDNZ   RA,D0Ar4,D0Ar2                  /* If Bit 0 set issue RA */
+       LSRS    D1Ar3,D1Ar3,#2                  /* Bit 1 -> C, Bit 2 -> Bit 0 */
+       ADD     D0Ar2,D0Ar2,#8
+       ADDCS   RA,D0Ar4,D0Ar2                  /* If C issue RA */
+       ADD     D0Ar2,D0Ar2,#8
+       ADDNZ   RA,D0Ar4,D0Ar2                  /* If Bit 0 set issue RA */
+       MOV     TXDIVTIME,A1.3                  /* Set RPDIRTY again */
+___TBIResCrit:
+       LSLS    D1Ar5,D0Re0,#1                  /* Test XCBF (MI) & PRIV (CS)? */
+#ifdef TBI_1_4
+       ANDT    D1Ar5,D1Ar5,#(TBICTX_FPAC_BIT*2)
+       LSL     D0Ar6,D1Ar5,#3                  /* Convert FPAC into FPACTIVE */
+#endif
+       ANDMT   D0Re0,D0Re0,#TBICTX_CBUF_BIT    /* Keep CBUF bit from SaveMask */
+#ifdef TBI_1_4
+       OR      D0Re0,D0Re0,D0Ar6               /* Combine FPACTIVE with others */
+#endif
+       MGETL   D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7,[A0StP] /* Restore DX */
+       MOV     TXRPT,A0.2                      /* Restore CT regs */
+       MOV     TXBPOBITS,A1.2
+       MOV     TXMODE,A0.3
+       BCC     ___TBIBoingPCX                  /* Do non-PRIV wait! */
+       MOV     A1GblIGbP,A1GbP                 /* Save A1GbP too */
+       MGETL   A0StP,A0FrP,A0.2,A0.3 A0_4,[D1Re0] /* Restore AX regs */
+/*
+ * Wait for the first interrupt/exception trigger in a privilege mode system
+ * (interrupt stack area for current TASK to be pointed to by A0GblIStP
+ * or per_cpu__stack_save[hwthread_id]).
+ */
+       MOV     TXSTATUS,D0Re0                  /* Restore flags */
+       MOV     D0Re0,TXPRIVEXT                 /* Set TXPRIVEXT_TXTOGGLEI_BIT */
+       SUB     D1Re0,D1Re0,#TBICTX_BYTES       /* TBICTX is top of int stack */
+#ifdef TBX_PERCPU_SP_SAVE
+       SWAP    D1Ar3,A1GbP
+       MOV     D1Ar3,TXENABLE                  /* Which thread are we? */
+       AND     D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+       LSR     D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+       ADDT    D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+       ADD     D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+       SETD    [D1Ar3],D1Re0
+       SWAP    D1Ar3,A1GbP
+#else
+       MOV     A0GblIStP, D1Re0
+#endif
+       OR      D0Re0,D0Re0,#TXPRIVEXT_TXTOGGLEI_BIT
+       MOV     TXPRIVEXT,D0Re0                 /* Cannot set TXPRIVEXT if !priv */
+       GETL    D0Re0,D1Re0,[D1Re0+#TBICTX_DX]
+       RTI                                     /* Wait for interrupt */
+/*
+ * Save initial interrupt state on A0GblIStP, switch to A0GblIStP if
+ * BOOTROM code, save and switch to [A1GbP] otherwise.
+ */
+___TBIBoingPCXP:
+#ifdef TBX_PERCPU_SP_SAVE
+       SWAP    D1Ar3,A1GbP                     /* Get PRIV stack base */
+       MOV     D1Ar3,TXENABLE                  /* Which thread are we? */
+       AND     D1Ar3,D1Ar3,#TXENABLE_THREAD_BITS
+       LSR     D1Ar3,D1Ar3,#TXENABLE_THREAD_S-2
+       ADDT    D1Ar3,D1Ar3,#HI(_per_cpu__stack_save)
+       ADD     D1Ar3,D1Ar3,#LO(_per_cpu__stack_save)
+       GETD    D1Ar3,[D1Ar3]
+#else
+       SWAP    D1Ar3,A0GblIStP                 /* Get PRIV stack base */
+#endif
+       SETL    [D1Ar3+#TBICTX_DX],D0Re0,D1Re0 /* Save key registers */
+       MOV     D0Re0,TXPRIVEXT                 /* Clear TXPRIVEXT_TXTOGGLEI_BIT */
+       ADD     D1Re0,D1Ar3,#TBICTX_AX  /* Address AX save area */
+       ANDMB   D0Re0,D0Re0,#0xFFFF-TXPRIVEXT_TXTOGGLEI_BIT
+       MOV     TXPRIVEXT,D0Re0                 /* Cannot set TXPRIVEXT if !priv */
+       MOV     D0Re0,TXSTATUS                  /* Read TXSTATUS into D0Re0 */
+       MOV     TXSTATUS,#0                     /* Clear TXSTATUS */
+       MSETL   [D1Re0],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX critical regs */
+       MOV     A0StP,D1Ar3                     /* Switch stacks */
+#ifdef TBX_PERCPU_SP_SAVE
+       MOV     D1Ar3,A1GbP                     /* Get D1Ar2 back */
+#else
+       MOV     D1Ar3,A0GblIStP                 /* Get D1Ar2 back */
+#endif
+       ORT     D0Re0,D0Re0,#TBICTX_PRIV_BIT    /* Add PRIV to TXSTATUS */
+       MOV     A1GbP,A1GblIGbP                 /* Restore A1GbP */
+       B       ___TBIBoing                     /* Enter common handler code */
+/*
+ * At this point we know it's a background HALT case we are handling.
+ * The restored TXSTATUS always needs to have zero in the reason bits.
+ */
+___TBIBoingHalt:
+       MOV     D0Ar4,TXMASKI                   /* Get interrupt mask */
+       ANDST   D0Re0,D0Re0,#HI(TXSTATUS_MAJOR_HALT_BITS+TXSTATUS_MEM_FAULT_BITS)
+       AND     TXMASKI,D0Ar4,#TXSTATI_BGNDHALT_BIT /* Only allow HALTs */
+       AND     D0Ar4,D0Ar4,#0xFFFF-TXSTATI_BGNDHALT_BIT /* What ints are off? */
+       OR      D0Ar2,D0Ar2,D0Ar4               /* Set TBIRES.Sig.TrigMask */
+       MOV     D0Ar4,#TXSTATI_BGNDHALT_BIT     /* This was the trigger state */
+       LSR     D1Ar3,D0Re0,#TXSTATUS_MAJOR_HALT_S
+       MOV     D0Re0,#TBID_SIGNUM_XXF<<TBID_SIGNUM_S
+       BNZ     ___TBIBoingVec                  /* Jump to XXF exception handler */
+/*
+ * Only the SWITCH cases are left, PCX must be valid
+ */
+#ifdef TBI_1_4
+       MOV     D1Ar5,TXPRIVEXT
+       TST     D1Ar5,#TXPRIVEXT_MINIMON_BIT
+       LSR     D1Ar3,D1Ar1,#1                  /* Shift needed for MINIM paths (fill stall) */
+       BZ      $Lmeta                          /* If META only, skip */
+       TSTT    D1Ar1,#HI(0x00800000)
+       ANDMT   D1Ar3,D1Ar3,#HI(0x007FFFFF >> 1)/* Shifted mask for large MINIM */
+       ANDT    D1Ar1,D1Ar1,#HI(0xFFE00000)     /* Static mask for small MINIM */
+       BZ      $Llarge_minim                   /* If large MINIM */
+$Lsmall_minim:
+       TSTT    D1Ar3,#HI(0x00100000 >> 1)
+       ANDMT   D1Ar3,D1Ar3,#HI(0x001FFFFF >> 1)/* Correct shifted mask for large MINIM */
+       ADDZ    D1Ar1,D1Ar1,D1Ar3               /* If META rgn, add twice to undo LSR #1 */
+       B       $Lrecombine
+$Llarge_minim:
+       ANDST   D1Ar1,D1Ar1,#HI(0xFF800000)     /* Correct static mask for small MINIM */
+                                               /* Z=0 (Cannot place code at NULL) */
+$Lrecombine:
+       ADD     D1Ar1,D1Ar1,D1Ar3               /* Combine static and shifted parts */
+$Lmeta:
+       GETW    D1Ar5,[D1Ar1++]                 /* META: lo-16, MINIM: lo-16 (all-16 if short) */
+       GETW    D1Ar3,[D1Ar1]                   /* META: hi-16, MINIM: hi-16 (only if long) */
+       MOV     D1Re0,D1Ar5
+       XOR     D1Re0,D1Re0,#0x4000
+       LSLSNZ  D1Re0,D1Re0,#(32-14)            /* MINIM: If long C=0, if short C=1 */
+       LSLCC   D1Ar3,D1Ar3,#16                 /* META/MINIM long: Move hi-16 up */
+       LSLCS   D1Ar3,D1Ar5,#16                 /* MINIM short: Dup all-16 */
+       ADD     D1Ar5,D1Ar5,D1Ar3               /* ALL: Combine both 16-bit parts */
+#else
+       GETD    D1Ar5,[D1Ar1]                   /* Read instruction for switch */
+#endif
+       LSR     D1Ar3,D1Ar5,#22                 /* Convert into signal number */
+       AND     D1Ar3,D1Ar3,#TBID_SIGNUM_SW3-TBID_SIGNUM_SW0
+       LSL     D0Re0,D1Ar3,#TBID_SIGNUM_S      /* Generate offset from SigNum */
+       B       ___TBIBoingVec                  /* Jump to switch handler */
+/*
+ * Exit from TBIASyncTrigger call
+ */
+___TBIBoingExit:
+       GETL    D0FrT,D1RtP,[A0FrP++]           /* Restore state from frame */
+       SUB     A0StP,A0FrP,#8                  /* Unwind stack */
+       MOV     A0FrP,D0FrT                     /* Last memory read completes */
+       MOV     PC,D1RtP                        /* Return to caller */
+#endif /* ifdef CODE_USES_BOOTROM */
+       .size   ___TBIResume,.-___TBIResume
+
+#ifndef BOOTROM
+/*
+ * void __TBIASyncResume( TBIRES State )
+ */
+       .text
+       .balign 4
+       .global ___TBIASyncResume
+       .type   ___TBIASyncResume,function
+___TBIASyncResume:
+/*
+ * Perform CRIT|SOFT state restore and execute background thread.
+ */
+       MOV     D1Ar3,D1Ar1                     /* Restore this context */
+       MOV     D0Re0,D0Ar2                     /* Carry in additional triggers */
+                                               /* Reserve space for TBICTX */
+       ADD     D1Ar3,D1Ar3,#TBICTX_BYTES+(CATCH_ENTRY_BYTES*CATCH_ENTRIES)
+       MOV     A0StP,D1Ar3                     /* Enter with protection of */
+       MOV     A0FrP,D1Ar1                     /*   TBICTX on our stack */
+#ifdef CODE_USES_BOOTROM
+       MOVT    D1Ar1,#HI(LINCORE_BASE)
+       JUMP    D1Ar1,#0xA4
+#else
+       B       ___TBIResume
+#endif
+       .size   ___TBIASyncResume,.-___TBIASyncResume
+#endif /* ifndef BOOTROM */
+
+/*
+ * End of tbipcx.S
+ */
diff --git a/arch/metag/tbx/tbiroot.S b/arch/metag/tbx/tbiroot.S
new file mode 100644 (file)
index 0000000..7d84daf
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * tbiroot.S
+ *
+ * Copyright (C) 2001, 2002, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Module that creates and via ___TBI function returns a TBI Root Block for
+ * interrupt and background processing on the current thread.
+ */
+
+       .file   "tbiroot.S"
+#include <asm/metag_regs.h>
+
+/*
+ * Get data structures and defines from the TBI C header
+ */
+#include <asm/tbx.h>
+
+
+/* If signals need to be exchanged we must create a TBI Root Block */
+
+       .data
+       .balign 8
+       .global ___pTBIs
+       .type   ___pTBIs,object
+___pTBIs:
+       .long   0 /* Bgnd+Int root block ptrs */
+       .long   0
+       .size   ___pTBIs,.-___pTBIs
+
+
+/*
+ * Return ___pTBIs value specific to execution level with promotion/demotion
+ *
+ * Register Usage: D1Ar1 is Id, D0Re0 is the primary result
+ *                 D1Re0 is secondary result (___pTBIs for other exec level)
+ */
+       .text
+       .balign 4
+       .global ___TBI
+       .type   ___TBI,function
+___TBI:
+       TSTT    D1Ar1,#HI(TBID_ISTAT_BIT)       /* Bgnd or Int level? */
+       MOVT    A1LbP,#HI(___pTBIs)
+       ADD     A1LbP,A1LbP,#LO(___pTBIs)
+       GETL    D0Re0,D1Re0,[A1LbP] /* Base of root block table */
+       SWAPNZ  D0Re0,D1Re0                     /* Swap if asked */
+       MOV     PC,D1RtP
+       .size   ___TBI,.-___TBI
+
+
+/*
+ * Return identifier of the current thread in TBI segment or signal format with
+ * secondary mask to indicate privilege and interrupt level of thread
+ */
+       .text
+       .balign 4
+       .global ___TBIThrdPrivId
+       .type   ___TBIThrdPrivId,function
+___TBIThrdPrivId:
+       .global ___TBIThreadId
+       .type   ___TBIThreadId,function
+___TBIThreadId:
+#ifndef METAC_0_1
+       MOV     D1Re0,TXSTATUS                  /* Are we privileged or int? */
+       MOV     D0Re0,TXENABLE                  /* Which thread are we? */
+/* Disable privilege adaption for now */
+       ANDT    D1Re0,D1Re0,#HI(TXSTATUS_ISTAT_BIT) /* +TXSTATUS_PSTAT_BIT) */
+       LSL     D1Re0,D1Re0,#TBID_ISTAT_S-TXSTATUS_ISTAT_S
+       AND     D0Re0,D0Re0,#TXENABLE_THREAD_BITS
+       LSL     D0Re0,D0Re0,#TBID_THREAD_S-TXENABLE_THREAD_S
+#else
+/* Thread 0 only */
+       XOR     D0Re0,D0Re0,D0Re0
+       XOR     D1Re0,D1Re0,D1Re0
+#endif
+       MOV     PC,D1RtP                        /* Return */
+       .size   ___TBIThrdPrivId,.-___TBIThrdPrivId
+       .size   ___TBIThreadId,.-___TBIThreadId 
+
+
+/*
+ * End of tbiroot.S
+ */
diff --git a/arch/metag/tbx/tbisoft.S b/arch/metag/tbx/tbisoft.S
new file mode 100644 (file)
index 0000000..0346fe8
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ * tbisoft.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * Support for soft threads and soft context switches
+ */
+
+       .file   "tbisoft.S"
+
+#include <asm/tbx.h>
+
+#ifdef METAC_1_0
+/* Ax.4 is saved in TBICTX */
+#define A0_4  ,A0.4
+#define D0_5  ,D0.5
+#else
+/* Ax.4 is NOT saved in TBICTX */
+#define A0_4
+#define D0_5
+#endif
+
+/* Size of the TBICTX structure */
+#define TBICTX_BYTES ((TBICTX_AX_REGS*8)+TBICTX_AX)
+
+       .text
+       .balign 4
+       .global ___TBISwitchTail
+       .type   ___TBISwitchTail,function
+___TBISwitchTail:
+       B       $LSwitchTail
+       .size   ___TBISwitchTail,.-___TBISwitchTail
+
+/* 
+ * TBIRES __TBIJumpX( TBIX64 ArgsA, PTBICTX *rpSaveCtx, int TrigsMask,
+ *                                    void (*fnMain)(), void *pStack );
+ *
+ * This is a combination of __TBISwitch and __TBIJump with the context of
+ * the calling thread being saved in the rpSaveCtx location with a drop-thru
+ *  effect into the __TBIJump logic. ArgsB passes via __TBIJump to the
+ *  routine eventually invoked will reflect the rpSaveCtx value specified.
+ */
+       .text
+       .balign 4
+       .global ___TBIJumpX
+       .type   ___TBIJumpX,function
+___TBIJumpX:
+       CMP     D1RtP,#-1
+       B       $LSwitchStart
+       .size   ___TBIJumpX,.-___TBIJumpX
+
+/*
+ * TBIRES __TBISwitch( TBIRES Switch, PTBICTX *rpSaveCtx )
+ *
+ * Software syncronous context switch between soft threads, save only the
+ * registers which are actually valid on call entry.
+ *
+ *     A0FrP, D0RtP, D0.5, D0.6, D0.7      - Saved on stack
+ *     A1GbP is global to all soft threads so not virtualised
+ *     A0StP is then saved as the base of the TBICTX of the thread
+ *     
+ */
+       .text
+       .balign 4
+       .global ___TBISwitch
+       .type   ___TBISwitch,function
+___TBISwitch:
+       XORS    D0Re0,D0Re0,D0Re0               /* Set ZERO flag */
+$LSwitchStart:
+       MOV     D0FrT,A0FrP                     /* Boing entry sequence */
+       ADD     A0FrP,A0StP,#0                  
+       SETL    [A0StP+#8++],D0FrT,D1RtP
+/*
+ * Save current frame state - we save all regs because we don't want
+ * uninitialised crap in the TBICTX structure that the asyncronous resumption
+ * of a thread will restore.
+ */
+       MOVT    D1Re0,#HI($LSwitchExit)         /* ASync resume point here */
+       ADD     D1Re0,D1Re0,#LO($LSwitchExit)
+       SETD    [D1Ar3],A0StP                   /* Record pCtx of this thread */
+       MOVT    D0Re0,#TBICTX_SOFT_BIT          /* Only soft thread state */
+       SETL    [A0StP++],D0Re0,D1Re0           /* Push header fields */
+       ADD     D0FrT,A0StP,#TBICTX_AX-TBICTX_DX /* Address AX save area */
+       MOV     D0Re0,#0                        /* Setup 0:0 result for ASync */
+       MOV     D1Re0,#0                        /* resume of the thread */
+       MSETL   [A0StP],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+       SETL    [A0StP++],D0Re0,D1Re0           /* Zero CurrRPT, CurrBPOBITS, */
+       SETL    [A0StP++],D0Re0,D1Re0           /* Zero CurrMODE, CurrDIVTIME */
+       ADD     A0StP,A0StP,#(TBICTX_AX_REGS*8) /* Reserve AX save space */
+       MSETL   [D0FrT],A0StP,A0FrP,A0.2,A0.3 A0_4 /* Save AX regs */
+       BNZ     ___TBIJump
+/*
+ * NextThread MUST be in TBICTX_SOFT_BIT state!
+ */
+$LSwitchTail:
+       MOV     D0Re0,D0Ar2                     /* Result from args */
+       MOV     D1Re0,D1Ar1
+       ADD     D1RtP,D1Ar1,#TBICTX_AX
+       MGETL   A0StP,A0FrP,[D1RtP]             /* Get frame values */
+$LSwitchCmn:
+       ADD     A0.2,D1Ar1,#TBICTX_DX+(8*5)
+       MGETL   D0.5,D0.6,D0.7,[A0.2]           /* Get caller-saved DX regs */
+$LSwitchExit:
+       GETL    D0FrT,D1RtP,[A0FrP++]           /* Restore state from frame */
+       SUB     A0StP,A0FrP,#8                  /* Unwind stack */
+       MOV     A0FrP,D0FrT                     /* Last memory read completes */
+       MOV     PC,D1RtP                        /* Return to caller */
+       .size   ___TBISwitch,.-___TBISwitch
+
+/*
+ * void __TBISyncResume( TBIRES State, int TrigMask );
+ *
+ * This routine causes the TBICTX structure specified in State.Sig.pCtx to
+ * be restored. This implies that execution will not return to the caller.
+ * The State.Sig.TrigMask field will be ored into TXMASKI during the
+ * context switch such that any immediately occuring interrupts occur in
+ * the context of the newly specified task. The State.Sig.SaveMask parameter
+ * is ignored.
+ */
+       .text
+       .balign 4
+       .global ___TBISyncResume
+       .type   ___TBISyncResume,function
+___TBISyncResume:
+       MOV     D0Re0,D0Ar2                     /* Result from args */
+       MOV     D1Re0,D1Ar1
+       XOR     D1Ar5,D1Ar5,D1Ar5               /* D1Ar5 = 0 */
+       ADD     D1RtP,D1Ar1,#TBICTX_AX
+       SWAP    D1Ar5,TXMASKI                   /* D1Ar5 <-> TXMASKI */
+       MGETL   A0StP,A0FrP,[D1RtP]             /* Get frame values */
+       OR      TXMASKI,D1Ar5,D1Ar3             /* New TXMASKI */
+       B       $LSwitchCmn
+       .size   ___TBISyncResume,.-___TBISyncResume
+
+/*
+ * void __TBIJump( TBIX64 ArgsA, TBIX32 ArgsB, int TrigsMask,
+ *                               void (*fnMain)(), void *pStack );
+ *
+ * Jump directly to a new routine on an arbitrary stack with arbitrary args
+ * oring bits back into TXMASKI on route.
+ */
+       .text
+       .balign 4
+       .global ___TBIJump
+       .type   ___TBIJump,function
+___TBIJump:
+       XOR     D0Re0,D0Re0,D0Re0               /* D0Re0 = 0 */
+       MOV     A0StP,D0Ar6                     /* Stack = Frame */
+       SWAP    D0Re0,TXMASKI                   /* D0Re0 <-> TXMASKI */
+       MOV     A0FrP,D0Ar6                     
+       MOVT    A1LbP,#HI(__exit)
+       ADD     A1LbP,A1LbP,#LO(__exit)
+       MOV     D1RtP,A1LbP                     /* D1RtP = __exit */
+       OR      TXMASKI,D0Re0,D0Ar4             /* New TXMASKI */
+       MOV     PC,D1Ar5                        /* Jump to fnMain */
+       .size   ___TBIJump,.-___TBIJump
+
+/*
+ *     PTBICTX __TBISwitchInit( void *pStack, int (*fnMain)(),
+ *                             .... 4 extra 32-bit args .... );
+ *                             
+ * Generate a new soft thread context ready for it's first outing.
+ *
+ *     D1Ar1 - Region of memory to be used as the new soft thread stack
+ *     D0Ar2 - Main line routine for new soft thread
+ *     D1Ar3, D0Ar4, D1Ar5, D0Ar6 - arguments to be passed on stack
+ *     The routine returns the initial PTBICTX value for the new thread
+ */
+       .text
+       .balign 4
+       .global ___TBISwitchInit
+       .type   ___TBISwitchInit,function
+___TBISwitchInit:
+       MOV     D0FrT,A0FrP                     /* Need save return point */
+       ADD     A0FrP,A0StP,#0
+       SETL    [A0StP++],D0FrT,D1RtP           /* Save return to caller */
+       MOVT    A1LbP,#HI(__exit)
+       ADD     A1LbP,A1LbP,#LO(__exit)
+       MOV     D1RtP,A1LbP                     /* Get address of __exit */
+       ADD     D1Ar1,D1Ar1,#7                  /* Align stack to 64-bits */
+       ANDMB   D1Ar1,D1Ar1,#0xfff8             /*   by rounding base up */
+       MOV     A0.2,D1Ar1                      /* A0.2 is new stack */
+       MOV     D0FrT,D1Ar1                     /* Initial puesdo-frame pointer */
+       SETL    [A0.2++],D0FrT,D1RtP            /* Save return to __exit */
+       MOV     D1RtP,D0Ar2
+       SETL    [A0.2++],D0FrT,D1RtP            /* Save return to fnMain */
+       ADD     D0FrT,D0FrT,#8                  /* Advance puesdo-frame pointer */
+       MSETL   [A0.2],D0Ar6,D0Ar4              /* Save extra initial args */
+       MOVT    D1RtP,#HI(___TBIStart)          /* Start up code for new stack */
+       ADD     D1RtP,D1RtP,#LO(___TBIStart)
+       SETL    [A0.2++],D0FrT,D1RtP            /* Save return to ___TBIStart */
+       ADD     D0FrT,D0FrT,#(8*3)              /* Advance puesdo-frame pointer */
+       MOV     D0Re0,A0.2                      /* Return pCtx for new thread */
+       MOV     D1Re0,#0                        /* pCtx:0 is default Arg1:Arg2 */
+/*
+ * Generate initial TBICTX state
+ */
+       MOVT    D1Ar1,#HI($LSwitchExit)         /* Async restore code */
+       ADD     D1Ar1,D1Ar1,#LO($LSwitchExit)
+       MOVT    D0Ar2,#TBICTX_SOFT_BIT          /* Only soft thread state */
+       ADD     D0Ar6,A0.2,#TBICTX_BYTES        /* New A0StP */
+       MOV     D1Ar5,A1GbP                     /* Same A1GbP */
+       MOV     D0Ar4,D0FrT                     /* Initial A0FrP */
+       MOV     D1Ar3,A1LbP                     /* Same A1LbP */
+       SETL    [A0.2++],D0Ar2,D1Ar1            /* Set header fields */
+       MSETL   [A0.2],D0Re0,D0Ar6,D0Ar4,D0Ar2,D0FrT,D0.5,D0.6,D0.7
+       MOV     D0Ar2,#0                        /* Zero values */
+       MOV     D1Ar1,#0
+       SETL    [A0.2++],D0Ar2,D1Ar1            /* Zero CurrRPT, CurrBPOBITS, */
+       SETL    [A0.2++],D0Ar2,D1Ar1            /*      CurrMODE, and pCurrCBuf */
+       MSETL   [A0.2],D0Ar6,D0Ar4,D0Ar2,D0FrT D0_5 /* Set DX and then AX regs */
+       B       $LSwitchExit                    /* All done! */
+       .size   ___TBISwitchInit,.-___TBISwitchInit
+
+       .text
+       .balign 4
+       .global ___TBIStart
+       .type   ___TBIStart,function
+___TBIStart:
+       MOV     D1Ar1,D1Re0                     /* Pass TBIRES args to call */
+       MOV     D0Ar2,D0Re0
+       MGETL   D0Re0,D0Ar6,D0Ar4,[A0FrP]       /* Get hidden args */
+       SUB     A0StP,A0FrP,#(8*3)              /* Entry stack pointer */
+       MOV     A0FrP,D0Re0                     /* Entry frame pointer */
+       MOVT    A1LbP,#HI(__exit)
+       ADD     A1LbP,A1LbP,#LO(__exit)
+       MOV     D1RtP,A1LbP                     /* D1RtP = __exit */
+       MOV     PC,D1Re0                        /* Jump into fnMain */
+       .size   ___TBIStart,.-___TBIStart
+
+/*
+ * End of tbisoft.S
+ */
diff --git a/arch/metag/tbx/tbistring.c b/arch/metag/tbx/tbistring.c
new file mode 100644 (file)
index 0000000..f90cd08
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * tbistring.c
+ *
+ * Copyright (C) 2001, 2002, 2003, 2005, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * String table functions provided as part of the thread binary interface for
+ * Meta processors
+ */
+
+#include <linux/export.h>
+#include <linux/string.h>
+#include <asm/tbx.h>
+
+/*
+ * There are not any functions to modify the string table currently, if these
+ * are required at some later point I suggest having a seperate module and
+ * ensuring that creating new entries does not interfere with reading old
+ * entries in any way.
+ */
+
+const TBISTR *__TBIFindStr(const TBISTR *start,
+                          const char *str, int match_len)
+{
+       const TBISTR *search = start;
+       bool exact = true;
+       const TBISEG *seg;
+
+       if (match_len < 0) {
+               /* Make match_len always positive for the inner loop */
+               match_len = -match_len;
+               exact = false;
+       } else {
+               /*
+                * Also support historic behaviour, which expected match_len to
+                * include null terminator
+                */
+               if (match_len && str[match_len-1] == '\0')
+                       match_len--;
+       }
+
+       if (!search) {
+               /* Find global string table segment */
+               seg = __TBIFindSeg(NULL, TBID_SEG(TBID_THREAD_GLOBAL,
+                                                 TBID_SEGSCOPE_GLOBAL,
+                                                 TBID_SEGTYPE_STRING));
+
+               if (!seg || seg->Bytes < sizeof(TBISTR))
+                       /* No string table! */
+                       return NULL;
+
+               /* Start of string table */
+               search = seg->pGAddr;
+       }
+
+       for (;;) {
+               while (!search->Tag)
+                       /* Allow simple gaps which are just zero initialised */
+                       search = (const TBISTR *)((const char *)search + 8);
+
+               if (search->Tag == METAG_TBI_STRE) {
+                       /* Reached the end of the table */
+                       search = NULL;
+                       break;
+               }
+
+               if ((search->Len >= match_len) &&
+                   (!exact || (search->Len == match_len + 1)) &&
+                   (search->Tag != METAG_TBI_STRG)) {
+                       /* Worth searching */
+                       if (!strncmp(str, (const char *)search->String,
+                                    match_len))
+                               break;
+               }
+
+               /* Next entry */
+               search = (const TBISTR *)((const char *)search + search->Bytes);
+       }
+
+       return search;
+}
+
+const void *__TBITransStr(const char *str, int len)
+{
+       const TBISTR *search = NULL;
+       const void *res = NULL;
+
+       for (;;) {
+               /* Search onwards */
+               search = __TBIFindStr(search, str, len);
+
+               /* No translation returns NULL */
+               if (!search)
+                       break;
+
+               /* Skip matching entries with no translation data */
+               if (search->TransLen != METAG_TBI_STRX) {
+                       /* Calculate base of translation string */
+                       res = (const char *)search->String +
+                               ((search->Len + 7) & ~7);
+                       break;
+               }
+
+               /* Next entry */
+               search = (const TBISTR *)((const char *)search + search->Bytes);
+       }
+
+       /* Return base address of translation data or NULL */
+       return res;
+}
+EXPORT_SYMBOL(__TBITransStr);
diff --git a/arch/metag/tbx/tbitimer.S b/arch/metag/tbx/tbitimer.S
new file mode 100644 (file)
index 0000000..5dbedde
--- /dev/null
@@ -0,0 +1,207 @@
+/*
+ * tbitimer.S
+ *
+ * Copyright (C) 2001, 2002, 2007, 2012 Imagination Technologies.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation.
+ *
+ * TBI timer support routines and data values
+ */
+
+       .file   "tbitimer.S"
+/*
+ * Get data structures and defines from the main C header
+ */
+#include <asm/tbx.h>
+
+       .data
+       .balign 8
+       .global ___TBITimeB
+       .type   ___TBITimeB,object
+___TBITimeB:
+       .quad   0               /* Background 'lost' ticks */
+       .size   ___TBITimeB,.-___TBITimeB
+
+       .data
+       .balign 8
+       .global ___TBITimeI
+       .type   ___TBITimeI,object
+___TBITimeI:
+       .quad   0               /* Interrupt 'lost' ticks */
+       .size   ___TBITimeI,.-___TBITimeI
+
+       .data
+       .balign 8
+       .global ___TBITimes
+       .type   ___TBITimes,object
+___TBITimes:
+       .long   ___TBITimeB     /* Table of 'lost' tick values */
+       .long   ___TBITimeI
+       .size   ___TBITimes,.-___TBITimes
+
+/*
+ * Flag bits for control of ___TBITimeCore
+ */
+#define TIMER_SET_BIT  1
+#define TIMER_ADD_BIT  2
+
+/*
+ * Initialise or stop timer support
+ *
+ * Register Usage: D1Ar1 holds Id, D1Ar2 is initial delay or 0
+ *                 D0FrT is used to call ___TBITimeCore
+ *                 D0Re0 is used for the result which is TXSTAT_TIMER_BIT
+ *                 D0Ar4, D1Ar5, D0Ar6 are all used as scratch
+ *               Other registers are those set by ___TBITimeCore
+ *                     A0.3 is assumed to point at ___TBITime(I/B)
+ */
+       .text
+       .balign 4
+       .global ___TBITimerCtrl
+       .type   ___TBITimerCtrl,function
+___TBITimerCtrl:
+       MOV     D1Ar5,#TIMER_SET_BIT            /* Timer SET request */
+       MOVT    D0FrT,#HI(___TBITimeCore)       /* Get timer core reg values */
+       CALL    D0FrT,#LO(___TBITimeCore)       /* and perform register update */
+       NEGS    D0Ar6,D0Ar2                     /* Set flags from time-stamp */
+       ASR     D1Ar5,D0Ar6,#31                 /* Sign extend D0Ar6 into D1Ar5 */
+       SETLNZ  [A0.3],D0Ar6,D1Ar5              /* ___TBITime(B/I)=-Start if enable */
+       MOV     PC,D1RtP                        /* Return */
+       .size   ___TBITimerCtrl,.-___TBITimerCtrl
+       
+/*
+ * Return ___TBITimeStamp value
+ *
+ * Register Usage: D1Ar1 holds Id
+ *                 D0FrT is used to call ___TBITimeCore
+ *                 D0Re0, D1Re0 is used for the result
+ *                 D1Ar3, D0Ar4, D1Ar5
+ *               Other registers are those set by ___TBITimeCore
+ *                     D0Ar6 is assumed to be the timer value read
+ *                     A0.3 is assumed to point at ___TBITime(I/B)
+ */
+       .text
+       .balign 4
+       .global ___TBITimeStamp
+       .type   ___TBITimeStamp,function
+___TBITimeStamp:
+       MOV     D1Ar5,#0                        /* Timer GET request */
+       MOVT    D0FrT,#HI(___TBITimeCore)       /* Get timer core reg values */
+       CALL    D0FrT,#LO(___TBITimeCore)       /* with no register update */
+       ADDS    D0Re0,D0Ar4,D0Ar6               /* Add current time value */
+       ADD     D1Re0,D1Ar3,D1Ar5               /*  to 64-bit signed extend time */
+       ADDCS   D1Re0,D1Re0,#1                  /* Support borrow too */
+       MOV     PC,D1RtP                        /* Return */
+       .size   ___TBITimeStamp,.-___TBITimeStamp
+
+/*
+ * Perform ___TBITimerAdd logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds value to be added to the timer
+ *                 D0Re0 is used for the result - new TIMER value
+ *                 D1Ar5, D0Ar6 are used as scratch
+ *               Other registers are those set by ___TBITimeCore
+ *                     D0Ar6 is assumed to be the timer value read
+ *                     D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ */
+       .text
+       .balign 4
+       .global ___TBITimerAdd
+       .type   ___TBITimerAdd,function
+___TBITimerAdd:
+       MOV     D1Ar5,#TIMER_ADD_BIT            /* Timer ADD request */
+       MOVT    D0FrT,#HI(___TBITimeCore)       /* Get timer core reg values */
+       CALL    D0FrT,#LO(___TBITimeCore)       /* with no register update */
+       ADD     D0Re0,D0Ar2,D0Ar6               /* Regenerate new value = result */
+       NEG     D0Ar2,D0Ar2                     /* Negate delta */
+       ASR     D1Re0,D0Ar2,#31                 /* Sign extend negated delta */
+       ADDS    D0Ar4,D0Ar4,D0Ar2               /* Add time added to ... */
+       ADD     D1Ar3,D1Ar3,D1Re0               /* ... real timer ... */
+       ADDCS   D1Ar3,D1Ar3,#1                  /* ... with carry */
+       SETL    [A0.3],D0Ar4,D1Ar3              /* Update ___TBITime(B/I) */
+       MOV     PC,D1RtP                        /* Return */
+       .size   ___TBITimerAdd,.-___TBITimerAdd
+
+#ifdef TBI_1_4
+/*
+ * Perform ___TBITimerDeadline logic
+ *    NB: Delays are positive compared to the Wait values which are -ive
+ *
+ * Register Usage: D1Ar1 holds Id
+ *                 D0Ar2 holds Delay requested
+ *                 D0Re0 is used for the result - old TIMER Delay value
+ *                 D1Ar5, D0Ar6 are used as scratch
+ *                 Other registers are those set by ___TBITimeCore
+ *                 D0Ar6 is assumed to be the timer value read
+ *                 D0Ar4, D1Ar3 is the current value of ___TBITime(B/I)
+ *
+ */
+        .text
+        .type   ___TBITimerDeadline,function
+        .global ___TBITimerDeadline
+        .align  2
+___TBITimerDeadline:
+       MOV     D1Ar5,#TIMER_SET_BIT            /* Timer SET request */
+       MOVT    D0FrT,#HI(___TBITimeCore)       /* Get timer core reg values */
+       CALL    D0FrT,#LO(___TBITimeCore)       /* with no register update */
+       MOV     D0Re0,D0Ar6                     /* Old value read = result */
+       SUB     D0Ar2,D0Ar6,D0Ar2               /* Delta from (old - new) */
+       ASR     D1Re0,D0Ar2,#31                 /* Sign extend delta */
+       ADDS    D0Ar4,D0Ar4,D0Ar2               /* Add time added to ... */
+       ADD     D1Ar3,D1Ar3,D1Re0               /* ... real timer ... */
+       ADDCS   D1Ar3,D1Ar3,#1                  /* ... with carry */
+       SETL    [A0.3],D0Ar4,D1Ar3              /* Update ___TBITime(B/I) */
+       MOV     PC,D1RtP                        /* Return */
+        .size   ___TBITimerDeadline,.-___TBITimerDeadline
+#endif /* TBI_1_4 */
+
+/*
+ * Perform core timer access logic
+ *
+ * Register Usage: D1Ar1 holds Id, D0Ar2 holds input value for SET and
+ *                                             input value for ADD
+ *                 D1Ar5 controls op as SET or ADD as bit values
+ *                 On return D0Ar6, D1Ar5 holds the old 64-bit timer value
+ *                 A0.3 is setup to point at ___TBITime(I/B)
+ *                 A1.3 is setup to point at ___TBITimes
+ *                 D0Ar4, D1Ar3 is setup to value of ___TBITime(I/B)
+ */
+       .text
+       .balign 4
+       .global ___TBITimeCore
+       .type   ___TBITimeCore,function
+___TBITimeCore:
+#ifndef METAC_0_1
+       TSTT    D1Ar1,#HI(TBID_ISTAT_BIT)       /* Interrupt level timer? */
+#endif
+       MOVT    A1LbP,#HI(___TBITimes)
+       ADD     A1LbP,A1LbP,#LO(___TBITimes)
+       MOV     A1.3,A1LbP                      /* Get ___TBITimes address */
+#ifndef METAC_0_1
+       BNZ     $LTimeCoreI                     /* Yes: Service TXTIMERI! */
+#endif
+       LSRS    D1Ar5,D1Ar5,#1                  /* Carry = SET, Zero = !ADD */
+       GETD    A0.3,[A1.3+#0]                  /* A0.3 == &___TBITimeB */
+       MOV     D0Ar6,TXTIMER                   /* Always GET old value */
+       MOVCS   TXTIMER,D0Ar2                   /* Conditional SET operation */
+       ADDNZ   TXTIMER,D0Ar2,D0Ar6             /* Conditional ADD operation */
+#ifndef METAC_0_1
+       B       $LTimeCoreEnd
+$LTimeCoreI:
+       LSRS    D1Ar5,D1Ar5,#1                  /* Carry = SET, Zero = !ADD */
+       GETD    A0.3,[A1.3+#4]                  /* A0.3 == &___TBITimeI */
+       MOV     D0Ar6,TXTIMERI                  /* Always GET old value */
+       MOVCS   TXTIMERI,D0Ar2                  /* Conditional SET operation */
+       ADDNZ   TXTIMERI,D0Ar2,D0Ar6            /* Conditional ADD operation */
+$LTimeCoreEnd:
+#endif
+       ASR     D1Ar5,D0Ar6,#31                 /* Sign extend D0Ar6 into D1Ar5 */
+       GETL    D0Ar4,D1Ar3,[A0.3]              /* Read ___TBITime(B/I) */
+       MOV     PC,D0FrT                        /* Return quickly */
+       .size   ___TBITimeCore,.-___TBITimeCore
+
+/*
+ * End of tbitimer.S
+ */
index e920cbe519fa0d2a019b31f1f60d5564247578b9..e507ab7df60b88d6bae4a3f69e28d8f6845924cd 100644 (file)
@@ -62,3 +62,8 @@ config CLKSRC_DBX500_PRCMU_SCHED_CLOCK
 
 config ARM_ARCH_TIMER
        bool
+
+config CLKSRC_METAG_GENERIC
+       def_bool y if METAG
+       help
+         This option enables support for the Meta per-thread timers.
index 7d671b85a98e6ff59c562dcbaa2941dc4794731e..4d8283aec5b51286ba3942ce4e3223a9281ab3cf 100644 (file)
@@ -21,3 +21,4 @@ obj-$(CONFIG_ARCH_TEGRA)      += tegra20_timer.o
 obj-$(CONFIG_VT8500_TIMER)     += vt8500_timer.o
 
 obj-$(CONFIG_ARM_ARCH_TIMER)           += arm_arch_timer.o
+obj-$(CONFIG_CLKSRC_METAG_GENERIC)     += metag_generic.o
diff --git a/drivers/clocksource/metag_generic.c b/drivers/clocksource/metag_generic.c
new file mode 100644 (file)
index 0000000..ade7513
--- /dev/null
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2005-2013 Imagination Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Support for Meta per-thread timers.
+ *
+ * Meta hardware threads have 2 timers. The background timer (TXTIMER) is used
+ * as a free-running time base (hz clocksource), and the interrupt timer
+ * (TXTIMERI) is used for the timer interrupt (clock event). Both counters
+ * traditionally count at approximately 1MHz.
+ */
+
+#include <clocksource/metag_generic.h>
+#include <linux/cpu.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/time.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/clocksource.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+
+#include <asm/clock.h>
+#include <asm/hwthread.h>
+#include <asm/core_reg.h>
+#include <asm/metag_mem.h>
+#include <asm/tbx.h>
+
+#define HARDWARE_FREQ          1000000 /* 1MHz */
+#define HARDWARE_DIV           1       /* divide by 1 = 1MHz clock */
+#define HARDWARE_TO_NS_SHIFT   10      /* convert ticks to ns */
+
+static unsigned int hwtimer_freq = HARDWARE_FREQ;
+static DEFINE_PER_CPU(struct clock_event_device, local_clockevent);
+static DEFINE_PER_CPU(char [11], local_clockevent_name);
+
+static int metag_timer_set_next_event(unsigned long delta,
+                                     struct clock_event_device *dev)
+{
+       __core_reg_set(TXTIMERI, -delta);
+       return 0;
+}
+
+static void metag_timer_set_mode(enum clock_event_mode mode,
+                                struct clock_event_device *evt)
+{
+       switch (mode) {
+       case CLOCK_EVT_MODE_ONESHOT:
+       case CLOCK_EVT_MODE_RESUME:
+               break;
+
+       case CLOCK_EVT_MODE_SHUTDOWN:
+               /* We should disable the IRQ here */
+               break;
+
+       case CLOCK_EVT_MODE_PERIODIC:
+       case CLOCK_EVT_MODE_UNUSED:
+               WARN_ON(1);
+               break;
+       };
+}
+
+static cycle_t metag_clocksource_read(struct clocksource *cs)
+{
+       return __core_reg_get(TXTIMER);
+}
+
+static struct clocksource clocksource_metag = {
+       .name = "META",
+       .rating = 200,
+       .mask = CLOCKSOURCE_MASK(32),
+       .read = metag_clocksource_read,
+       .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static irqreturn_t metag_timer_interrupt(int irq, void *dummy)
+{
+       struct clock_event_device *evt = &__get_cpu_var(local_clockevent);
+
+       evt->event_handler(evt);
+
+       return IRQ_HANDLED;
+}
+
+static struct irqaction metag_timer_irq = {
+       .name = "META core timer",
+       .handler = metag_timer_interrupt,
+       .flags = IRQF_TIMER | IRQF_IRQPOLL | IRQF_PERCPU,
+};
+
+unsigned long long sched_clock(void)
+{
+       unsigned long long ticks = __core_reg_get(TXTIMER);
+       return ticks << HARDWARE_TO_NS_SHIFT;
+}
+
+static void __cpuinit arch_timer_setup(unsigned int cpu)
+{
+       unsigned int txdivtime;
+       struct clock_event_device *clk = &per_cpu(local_clockevent, cpu);
+       char *name = per_cpu(local_clockevent_name, cpu);
+
+       txdivtime = __core_reg_get(TXDIVTIME);
+
+       txdivtime &= ~TXDIVTIME_DIV_BITS;
+       txdivtime |= (HARDWARE_DIV & TXDIVTIME_DIV_BITS);
+
+       __core_reg_set(TXDIVTIME, txdivtime);
+
+       sprintf(name, "META %d", cpu);
+       clk->name = name;
+       clk->features = CLOCK_EVT_FEAT_ONESHOT,
+
+       clk->rating = 200,
+       clk->shift = 12,
+       clk->irq = tbisig_map(TBID_SIGNUM_TRT),
+       clk->set_mode = metag_timer_set_mode,
+       clk->set_next_event = metag_timer_set_next_event,
+
+       clk->mult = div_sc(hwtimer_freq, NSEC_PER_SEC, clk->shift);
+       clk->max_delta_ns = clockevent_delta2ns(0x7fffffff, clk);
+       clk->min_delta_ns = clockevent_delta2ns(0xf, clk);
+       clk->cpumask = cpumask_of(cpu);
+
+       clockevents_register_device(clk);
+
+       /*
+        * For all non-boot CPUs we need to synchronize our free
+        * running clock (TXTIMER) with the boot CPU's clock.
+        *
+        * While this won't be accurate, it should be close enough.
+        */
+       if (cpu) {
+               unsigned int thread0 = cpu_2_hwthread_id[0];
+               unsigned long val;
+
+               val = core_reg_read(TXUCT_ID, TXTIMER_REGNUM, thread0);
+               __core_reg_set(TXTIMER, val);
+       }
+}
+
+static int __cpuinit arch_timer_cpu_notify(struct notifier_block *self,
+                                          unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+
+       switch (action) {
+       case CPU_STARTING:
+       case CPU_STARTING_FROZEN:
+               arch_timer_setup(cpu);
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata arch_timer_cpu_nb = {
+       .notifier_call = arch_timer_cpu_notify,
+};
+
+int __init metag_generic_timer_init(void)
+{
+       /*
+        * On Meta 2 SoCs, the actual frequency of the timer is based on the
+        * Meta core clock speed divided by an integer, so it is only
+        * approximately 1MHz. Calculating the real frequency here drastically
+        * reduces clock skew on these SoCs.
+        */
+#ifdef CONFIG_METAG_META21
+       hwtimer_freq = get_coreclock() / (metag_in32(EXPAND_TIMER_DIV) + 1);
+#endif
+       clocksource_register_hz(&clocksource_metag, hwtimer_freq);
+
+       setup_irq(tbisig_map(TBID_SIGNUM_TRT), &metag_timer_irq);
+
+       /* Configure timer on boot CPU */
+       arch_timer_setup(smp_processor_id());
+
+       /* Hook cpu boot to configure other CPU's timers */
+       register_cpu_notifier(&arch_timer_cpu_nb);
+
+       return 0;
+}
index e65fbf2cdf713814ef75cfca3831f4756ca8ec0e..98e3b87bdf1b48761df4320a9c588af26f38bb2c 100644 (file)
@@ -2,6 +2,8 @@ obj-$(CONFIG_IRQCHIP)                   += irqchip.o
 
 obj-$(CONFIG_ARCH_BCM2835)             += irq-bcm2835.o
 obj-$(CONFIG_ARCH_EXYNOS)              += exynos-combiner.o
+obj-$(CONFIG_METAG)                    += irq-metag-ext.o
+obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)   += irq-metag.o
 obj-$(CONFIG_ARCH_SUNXI)               += irq-sunxi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)            += spear-shirq.o
 obj-$(CONFIG_ARM_GIC)                  += irq-gic.o
diff --git a/drivers/irqchip/irq-metag-ext.c b/drivers/irqchip/irq-metag-ext.c
new file mode 100644 (file)
index 0000000..92c41ab
--- /dev/null
@@ -0,0 +1,868 @@
+/*
+ * Meta External interrupt code.
+ *
+ * Copyright (C) 2005-2012 Imagination Technologies Ltd.
+ *
+ * External interrupts on Meta are configured at two-levels, in the CPU core and
+ * in the external trigger block. Interrupts from SoC peripherals are
+ * multiplexed onto a single Meta CPU "trigger" - traditionally it has always
+ * been trigger 2 (TR2). For info on how de-multiplexing happens check out
+ * meta_intc_irq_demux().
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irqchip/metag-ext.h>
+#include <linux/irqdomain.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define HWSTAT_STRIDE 8
+#define HWVEC_BLK_STRIDE 0x1000
+
+/**
+ * struct meta_intc_priv - private meta external interrupt data
+ * @nr_banks:          Number of interrupt banks
+ * @domain:            IRQ domain for all banks of external IRQs
+ * @unmasked:          Record of unmasked IRQs
+ * @levels_altered:    Record of altered level bits
+ */
+struct meta_intc_priv {
+       unsigned int            nr_banks;
+       struct irq_domain       *domain;
+
+       unsigned long           unmasked[4];
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+       unsigned long           levels_altered[4];
+#endif
+};
+
+/* Private data for the one and only external interrupt controller */
+static struct meta_intc_priv meta_intc_priv;
+
+/**
+ * meta_intc_offset() - Get the offset into the bank of a hardware IRQ number
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Bit offset into the IRQ's bank registers
+ */
+static unsigned int meta_intc_offset(irq_hw_number_t hw)
+{
+       return hw & 0x1f;
+}
+
+/**
+ * meta_intc_bank() - Get the bank number of a hardware IRQ number
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Bank number indicating which register the IRQ's bits are
+ */
+static unsigned int meta_intc_bank(irq_hw_number_t hw)
+{
+       return hw >> 5;
+}
+
+/**
+ * meta_intc_stat_addr() - Get the address of a HWSTATEXT register
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Address of a HWSTATEXT register containing the status bit for
+ *             the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_stat_addr(irq_hw_number_t hw)
+{
+       return (void __iomem *)(HWSTATEXT +
+                               HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_level_addr() - Get the address of a HWLEVELEXT register
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Address of a HWLEVELEXT register containing the sense bit for
+ *             the specified hardware IRQ number
+ */
+static void __iomem *meta_intc_level_addr(irq_hw_number_t hw)
+{
+       return (void __iomem *)(HWLEVELEXT +
+                               HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_mask_addr() - Get the address of a HWMASKEXT register
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Address of a HWMASKEXT register containing the mask bit for the
+ *             specified hardware IRQ number
+ */
+static void __iomem *meta_intc_mask_addr(irq_hw_number_t hw)
+{
+       return (void __iomem *)(HWMASKEXT +
+                               HWSTAT_STRIDE * meta_intc_bank(hw));
+}
+
+/**
+ * meta_intc_vec_addr() - Get the vector address of a hardware interrupt
+ * @hw:                Hardware IRQ number (within external trigger block)
+ *
+ * Returns:    Address of a HWVECEXT register controlling the core trigger to
+ *             vector the IRQ onto
+ */
+static inline void __iomem *meta_intc_vec_addr(irq_hw_number_t hw)
+{
+       return (void __iomem *)(HWVEC0EXT +
+                               HWVEC_BLK_STRIDE * meta_intc_bank(hw) +
+                               HWVECnEXT_STRIDE * meta_intc_offset(hw));
+}
+
+/**
+ * meta_intc_startup_irq() - set up an external irq
+ * @data:      data for the external irq to start up
+ *
+ * Multiplex interrupts for irq onto TR2. Clear any pending interrupts and
+ * unmask irq, both using the appropriate callbacks.
+ */
+static unsigned int meta_intc_startup_irq(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+       int thread = hard_processor_id();
+
+       /* Perform any necessary acking. */
+       if (data->chip->irq_ack)
+               data->chip->irq_ack(data);
+
+       /* Wire up this interrupt to the core with HWVECxEXT. */
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+       /* Perform any necessary unmasking. */
+       data->chip->irq_unmask(data);
+
+       return 0;
+}
+
+/**
+ * meta_intc_shutdown_irq() - turn off an external irq
+ * @data:      data for the external irq to turn off
+ *
+ * Mask irq using the appropriate callback and stop muxing it onto TR2.
+ */
+static void meta_intc_shutdown_irq(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+       /* Mask the IRQ */
+       data->chip->irq_mask(data);
+
+       /*
+        * Disable the IRQ at the core by removing the interrupt from
+        * the HW vector mapping.
+        */
+       metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_ack_irq() - acknowledge an external irq
+ * @data:      data for the external irq to ack
+ *
+ * Clear down an edge interrupt in the status register.
+ */
+static void meta_intc_ack_irq(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *stat_addr = meta_intc_stat_addr(hw);
+
+       /* Ack the int, if it is still 'on'.
+        * NOTE - this only works for edge triggered interrupts.
+        */
+       if (metag_in32(stat_addr) & bit)
+               metag_out32(bit, stat_addr);
+}
+
+/**
+ * record_irq_is_masked() - record the IRQ masked so it doesn't get handled
+ * @data:      data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is masked (by whichever
+ * callback is used). It records the IRQ masked so that it doesn't get handled
+ * if it still shows up in the status register.
+ */
+static void record_irq_is_masked(struct irq_data *data)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       irq_hw_number_t hw = data->hwirq;
+
+       clear_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/**
+ * record_irq_is_unmasked() - record the IRQ unmasked so it can be handled
+ * @data:      data for the external irq to record
+ *
+ * This should get called whenever an external IRQ is unmasked (by whichever
+ * callback is used). It records the IRQ unmasked so that it gets handled if it
+ * shows up in the status register.
+ */
+static void record_irq_is_unmasked(struct irq_data *data)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       irq_hw_number_t hw = data->hwirq;
+
+       set_bit(meta_intc_offset(hw), &priv->unmasked[meta_intc_bank(hw)]);
+}
+
+/*
+ * For use by wrapper IRQ drivers
+ */
+
+/**
+ * meta_intc_mask_irq_simple() - minimal mask used by wrapper IRQ drivers
+ * @data:      data for the external irq being masked
+ *
+ * This should be called by any wrapper IRQ driver mask functions. it doesn't do
+ * any masking but records the IRQ as masked so that the core code knows the
+ * mask has taken place. It is the callers responsibility to ensure that the IRQ
+ * won't trigger an interrupt to the core.
+ */
+void meta_intc_mask_irq_simple(struct irq_data *data)
+{
+       record_irq_is_masked(data);
+}
+
+/**
+ * meta_intc_unmask_irq_simple() - minimal unmask used by wrapper IRQ drivers
+ * @data:      data for the external irq being unmasked
+ *
+ * This should be called by any wrapper IRQ driver unmask functions. it doesn't
+ * do any unmasking but records the IRQ as unmasked so that the core code knows
+ * the unmask has taken place. It is the callers responsibility to ensure that
+ * the IRQ can now trigger an interrupt to the core.
+ */
+void meta_intc_unmask_irq_simple(struct irq_data *data)
+{
+       record_irq_is_unmasked(data);
+}
+
+
+/**
+ * meta_intc_mask_irq() - mask an external irq using HWMASKEXT
+ * @data:      data for the external irq to mask
+ *
+ * This is a default implementation of a mask function which makes use of the
+ * HWMASKEXT registers available in newer versions.
+ *
+ * Earlier versions without these registers should use SoC level IRQ masking
+ * which call the meta_intc_*_simple() functions above, or if that isn't
+ * available should use the fallback meta_intc_*_nomask() functions below.
+ */
+static void meta_intc_mask_irq(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *mask_addr = meta_intc_mask_addr(hw);
+       unsigned long flags;
+
+       record_irq_is_masked(data);
+
+       /* update the interrupt mask */
+       __global_lock2(flags);
+       metag_out32(metag_in32(mask_addr) & ~bit, mask_addr);
+       __global_unlock2(flags);
+}
+
+/**
+ * meta_intc_unmask_irq() - unmask an external irq using HWMASKEXT
+ * @data:      data for the external irq to unmask
+ *
+ * This is a default implementation of an unmask function which makes use of the
+ * HWMASKEXT registers available on new versions. It should be paired with
+ * meta_intc_mask_irq() above.
+ */
+static void meta_intc_unmask_irq(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *mask_addr = meta_intc_mask_addr(hw);
+       unsigned long flags;
+
+       record_irq_is_unmasked(data);
+
+       /* update the interrupt mask */
+       __global_lock2(flags);
+       metag_out32(metag_in32(mask_addr) | bit, mask_addr);
+       __global_unlock2(flags);
+}
+
+/**
+ * meta_intc_mask_irq_nomask() - mask an external irq by unvectoring
+ * @data:      data for the external irq to mask
+ *
+ * This is the version of the mask function for older versions which don't have
+ * HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the IRQ is
+ * unvectored from the core and retriggered if necessary later.
+ */
+static void meta_intc_mask_irq_nomask(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+
+       record_irq_is_masked(data);
+
+       /* there is no interrupt mask, so unvector the interrupt */
+       metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:      data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to edge interrupts.
+ */
+static void meta_intc_unmask_edge_irq_nomask(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *stat_addr = meta_intc_stat_addr(hw);
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+       unsigned int thread = hard_processor_id();
+
+       record_irq_is_unmasked(data);
+
+       /* there is no interrupt mask, so revector the interrupt */
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+       /*
+        * Re-trigger interrupt
+        *
+        * Writing a 1 toggles, and a 0->1 transition triggers. We only
+        * retrigger if the status bit is already set, which means we
+        * need to clear it first. Retriggering is fundamentally racy
+        * because if the interrupt fires again after we clear it we
+        * could end up clearing it again and the interrupt handler
+        * thinking it hasn't fired. Therefore we need to keep trying to
+        * retrigger until the bit is set.
+        */
+       if (metag_in32(stat_addr) & bit) {
+               metag_out32(bit, stat_addr);
+               while (!(metag_in32(stat_addr) & bit))
+                       metag_out32(bit, stat_addr);
+       }
+}
+
+/**
+ * meta_intc_unmask_level_irq_nomask() - unmask a level irq by revectoring
+ * @data:      data for the external irq to unmask
+ *
+ * This is the version of the unmask function for older versions which don't
+ * have HWMASKEXT registers, or a SoC level means of masking IRQs. Instead the
+ * IRQ is revectored back to the core and retriggered if necessary.
+ *
+ * The retriggering done by this function is specific to level interrupts.
+ */
+static void meta_intc_unmask_level_irq_nomask(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *stat_addr = meta_intc_stat_addr(hw);
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+       unsigned int thread = hard_processor_id();
+
+       record_irq_is_unmasked(data);
+
+       /* there is no interrupt mask, so revector the interrupt */
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+       /* Re-trigger interrupt */
+       /* Writing a 1 triggers interrupt */
+       if (metag_in32(stat_addr) & bit)
+               metag_out32(bit, stat_addr);
+}
+
+/**
+ * meta_intc_irq_set_type() - set the type of an external irq
+ * @data:      data for the external irq to set the type of
+ * @flow_type: new irq flow type
+ *
+ * Set the flow type of an external interrupt. This updates the irq chip and irq
+ * handler depending on whether the irq is edge or level sensitive (the polarity
+ * is ignored), and also sets up the bit in HWLEVELEXT so the hardware knows
+ * when to trigger.
+ */
+static int meta_intc_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+#ifdef CONFIG_METAG_SUSPEND_MEM
+       struct meta_intc_priv *priv = &meta_intc_priv;
+#endif
+       unsigned int irq = data->irq;
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *level_addr = meta_intc_level_addr(hw);
+       unsigned long flags;
+       unsigned int level;
+
+       /* update the chip/handler */
+       if (flow_type & IRQ_TYPE_LEVEL_MASK)
+               __irq_set_chip_handler_name_locked(irq, &meta_intc_level_chip,
+                                                  handle_level_irq, NULL);
+       else
+               __irq_set_chip_handler_name_locked(irq, &meta_intc_edge_chip,
+                                                  handle_edge_irq, NULL);
+
+       /* and clear/set the bit in HWLEVELEXT */
+       __global_lock2(flags);
+       level = metag_in32(level_addr);
+       if (flow_type & IRQ_TYPE_LEVEL_MASK)
+               level |= bit;
+       else
+               level &= ~bit;
+       metag_out32(level, level_addr);
+#ifdef CONFIG_METAG_SUSPEND_MEM
+       priv->levels_altered[meta_intc_bank(hw)] |= bit;
+#endif
+       __global_unlock2(flags);
+
+       return 0;
+}
+
+/**
+ * meta_intc_irq_demux() - external irq de-multiplexer
+ * @irq:       the virtual interrupt number
+ * @desc:      the interrupt description structure for this irq
+ *
+ * The cpu receives an interrupt on TR2 when a SoC interrupt has occurred. It is
+ * this function's job to demux this irq and figure out exactly which external
+ * irq needs servicing.
+ *
+ * Whilst using TR2 to detect external interrupts is a software convention it is
+ * (hopefully) unlikely to change.
+ */
+static void meta_intc_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       irq_hw_number_t hw;
+       unsigned int bank, irq_no, status;
+       void __iomem *stat_addr = meta_intc_stat_addr(0);
+
+       /*
+        * Locate which interrupt has caused our handler to run.
+        */
+       for (bank = 0; bank < priv->nr_banks; ++bank) {
+               /* Which interrupts are currently pending in this bank? */
+recalculate:
+               status = metag_in32(stat_addr) & priv->unmasked[bank];
+
+               for (hw = bank*32; status; status >>= 1, ++hw) {
+                       if (status & 0x1) {
+                               /*
+                                * Map the hardware IRQ number to a virtual
+                                * Linux IRQ number.
+                                */
+                               irq_no = irq_linear_revmap(priv->domain, hw);
+
+                               /*
+                                * Only fire off external interrupts that are
+                                * registered to be handled by the kernel.
+                                * Other external interrupts are probably being
+                                * handled by other Meta hardware threads.
+                                */
+                               generic_handle_irq(irq_no);
+
+                               /*
+                                * The handler may have re-enabled interrupts
+                                * which could have caused a nested invocation
+                                * of this code and make the copy of the
+                                * status register we are using invalid.
+                                */
+                               goto recalculate;
+                       }
+               }
+               stat_addr += HWSTAT_STRIDE;
+       }
+}
+
+#ifdef CONFIG_SMP
+/**
+ * meta_intc_set_affinity() - set the affinity for an interrupt
+ * @data:      data for the external irq to set the affinity of
+ * @cpumask:   cpu mask representing cpus which can handle the interrupt
+ * @force:     whether to force (ignored)
+ *
+ * Revector the specified external irq onto a specific cpu's TR2 trigger, so
+ * that that cpu tends to be the one who handles it.
+ */
+static int meta_intc_set_affinity(struct irq_data *data,
+                                 const struct cpumask *cpumask, bool force)
+{
+       irq_hw_number_t hw = data->hwirq;
+       void __iomem *vec_addr = meta_intc_vec_addr(hw);
+       unsigned int cpu, thread;
+
+       /*
+        * Wire up this interrupt from HWVECxEXT to the Meta core.
+        *
+        * Note that we can't wire up HWVECxEXT to interrupt more than
+        * one cpu (the interrupt code doesn't support it), so we just
+        * pick the first cpu we find in 'cpumask'.
+        */
+       cpu = cpumask_any(cpumask);
+       thread = cpu_2_hwthread_id[cpu];
+
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR2(thread)), vec_addr);
+
+       return 0;
+}
+#else
+#define meta_intc_set_affinity NULL
+#endif
+
+#ifdef CONFIG_PM_SLEEP
+#define META_INTC_CHIP_FLAGS   (IRQCHIP_MASK_ON_SUSPEND \
+                               | IRQCHIP_SKIP_SET_WAKE)
+#else
+#define META_INTC_CHIP_FLAGS   0
+#endif
+
+/* public edge/level irq chips which SoCs can override */
+
+struct irq_chip meta_intc_edge_chip = {
+       .irq_startup            = meta_intc_startup_irq,
+       .irq_shutdown           = meta_intc_shutdown_irq,
+       .irq_ack                = meta_intc_ack_irq,
+       .irq_mask               = meta_intc_mask_irq,
+       .irq_unmask             = meta_intc_unmask_irq,
+       .irq_set_type           = meta_intc_irq_set_type,
+       .irq_set_affinity       = meta_intc_set_affinity,
+       .flags                  = META_INTC_CHIP_FLAGS,
+};
+
+struct irq_chip meta_intc_level_chip = {
+       .irq_startup            = meta_intc_startup_irq,
+       .irq_shutdown           = meta_intc_shutdown_irq,
+       .irq_set_type           = meta_intc_irq_set_type,
+       .irq_mask               = meta_intc_mask_irq,
+       .irq_unmask             = meta_intc_unmask_irq,
+       .irq_set_affinity       = meta_intc_set_affinity,
+       .flags                  = META_INTC_CHIP_FLAGS,
+};
+
+/**
+ * meta_intc_map() - map an external irq
+ * @d:         irq domain of external trigger block
+ * @irq:       virtual irq number
+ * @hw:                hardware irq number within external trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured, using the HWLEVELEXT registers to determine
+ * edge/level flow type. These registers will have been set when the irq type is
+ * set (or set to a default at init time).
+ */
+static int meta_intc_map(struct irq_domain *d, unsigned int irq,
+                        irq_hw_number_t hw)
+{
+       unsigned int bit = 1 << meta_intc_offset(hw);
+       void __iomem *level_addr = meta_intc_level_addr(hw);
+
+       /* Go by the current sense in the HWLEVELEXT register */
+       if (metag_in32(level_addr) & bit)
+               irq_set_chip_and_handler(irq, &meta_intc_level_chip,
+                                        handle_level_irq);
+       else
+               irq_set_chip_and_handler(irq, &meta_intc_edge_chip,
+                                        handle_edge_irq);
+       return 0;
+}
+
+static const struct irq_domain_ops meta_intc_domain_ops = {
+       .map = meta_intc_map,
+       .xlate = irq_domain_xlate_twocell,
+};
+
+#ifdef CONFIG_METAG_SUSPEND_MEM
+
+/**
+ * struct meta_intc_context - suspend context
+ * @levels:    State of HWLEVELEXT registers
+ * @masks:     State of HWMASKEXT registers
+ * @vectors:   State of HWVECEXT registers
+ * @txvecint:  State of TxVECINT registers
+ *
+ * This structure stores the IRQ state across suspend.
+ */
+struct meta_intc_context {
+       u32 levels[4];
+       u32 masks[4];
+       u8 vectors[4*32];
+
+       u8 txvecint[4][4];
+};
+
+/* suspend context */
+static struct meta_intc_context *meta_intc_context;
+
+/**
+ * meta_intc_suspend() - store irq state
+ *
+ * To avoid interfering with other threads we only save the IRQ state of IRQs in
+ * use by Linux.
+ */
+static int meta_intc_suspend(void)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       int i, j;
+       irq_hw_number_t hw;
+       unsigned int bank;
+       unsigned long flags;
+       struct meta_intc_context *context;
+       void __iomem *level_addr, *mask_addr, *vec_addr;
+       u32 mask, bit;
+
+       context = kzalloc(sizeof(*context), GFP_ATOMIC);
+       if (!context)
+               return -ENOMEM;
+
+       hw = 0;
+       level_addr = meta_intc_level_addr(0);
+       mask_addr = meta_intc_mask_addr(0);
+       for (bank = 0; bank < priv->nr_banks; ++bank) {
+               vec_addr = meta_intc_vec_addr(hw);
+
+               /* create mask of interrupts in use */
+               mask = 0;
+               for (bit = 1; bit; bit <<= 1) {
+                       i = irq_linear_revmap(priv->domain, hw);
+                       /* save mapped irqs which are enabled or have actions */
+                       if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+                                 irq_has_action(i))) {
+                               mask |= bit;
+
+                               /* save trigger vector */
+                               context->vectors[hw] = metag_in32(vec_addr);
+                       }
+
+                       ++hw;
+                       vec_addr += HWVECnEXT_STRIDE;
+               }
+
+               /* save level state if any IRQ levels altered */
+               if (priv->levels_altered[bank])
+                       context->levels[bank] = metag_in32(level_addr);
+               /* save mask state if any IRQs in use */
+               if (mask)
+                       context->masks[bank] = metag_in32(mask_addr);
+
+               level_addr += HWSTAT_STRIDE;
+               mask_addr += HWSTAT_STRIDE;
+       }
+
+       /* save trigger matrixing */
+       __global_lock2(flags);
+       for (i = 0; i < 4; ++i)
+               for (j = 0; j < 4; ++j)
+                       context->txvecint[i][j] = metag_in32(T0VECINT_BHALT +
+                                                            TnVECINT_STRIDE*i +
+                                                            8*j);
+       __global_unlock2(flags);
+
+       meta_intc_context = context;
+       return 0;
+}
+
+/**
+ * meta_intc_resume() - restore saved irq state
+ *
+ * Restore the saved IRQ state and drop it.
+ */
+static void meta_intc_resume(void)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       int i, j;
+       irq_hw_number_t hw;
+       unsigned int bank;
+       unsigned long flags;
+       struct meta_intc_context *context = meta_intc_context;
+       void __iomem *level_addr, *mask_addr, *vec_addr;
+       u32 mask, bit, tmp;
+
+       meta_intc_context = NULL;
+
+       hw = 0;
+       level_addr = meta_intc_level_addr(0);
+       mask_addr = meta_intc_mask_addr(0);
+       for (bank = 0; bank < priv->nr_banks; ++bank) {
+               vec_addr = meta_intc_vec_addr(hw);
+
+               /* create mask of interrupts in use */
+               mask = 0;
+               for (bit = 1; bit; bit <<= 1) {
+                       i = irq_linear_revmap(priv->domain, hw);
+                       /* restore mapped irqs, enabled or with actions */
+                       if (i && (!irqd_irq_disabled(irq_get_irq_data(i)) ||
+                                 irq_has_action(i))) {
+                               mask |= bit;
+
+                               /* restore trigger vector */
+                               metag_out32(context->vectors[hw], vec_addr);
+                       }
+
+                       ++hw;
+                       vec_addr += HWVECnEXT_STRIDE;
+               }
+
+               if (mask) {
+                       /* restore mask state */
+                       __global_lock2(flags);
+                       tmp = metag_in32(mask_addr);
+                       tmp = (tmp & ~mask) | (context->masks[bank] & mask);
+                       metag_out32(tmp, mask_addr);
+                       __global_unlock2(flags);
+               }
+
+               mask = priv->levels_altered[bank];
+               if (mask) {
+                       /* restore level state */
+                       __global_lock2(flags);
+                       tmp = metag_in32(level_addr);
+                       tmp = (tmp & ~mask) | (context->levels[bank] & mask);
+                       metag_out32(tmp, level_addr);
+                       __global_unlock2(flags);
+               }
+
+               level_addr += HWSTAT_STRIDE;
+               mask_addr += HWSTAT_STRIDE;
+       }
+
+       /* restore trigger matrixing */
+       __global_lock2(flags);
+       for (i = 0; i < 4; ++i) {
+               for (j = 0; j < 4; ++j) {
+                       metag_out32(context->txvecint[i][j],
+                                   T0VECINT_BHALT +
+                                   TnVECINT_STRIDE*i +
+                                   8*j);
+               }
+       }
+       __global_unlock2(flags);
+
+       kfree(context);
+}
+
+static struct syscore_ops meta_intc_syscore_ops = {
+       .suspend = meta_intc_suspend,
+       .resume = meta_intc_resume,
+};
+
+static void __init meta_intc_init_syscore_ops(struct meta_intc_priv *priv)
+{
+       register_syscore_ops(&meta_intc_syscore_ops);
+}
+#else
+#define meta_intc_init_syscore_ops(priv) do {} while (0)
+#endif
+
+/**
+ * meta_intc_init_cpu() - register with a Meta cpu
+ * @priv:      private interrupt controller data
+ * @cpu:       the CPU to register on
+ *
+ * Configure @cpu's TR2 irq so that we can demux external irqs.
+ */
+static void __init meta_intc_init_cpu(struct meta_intc_priv *priv, int cpu)
+{
+       unsigned int thread = cpu_2_hwthread_id[cpu];
+       unsigned int signum = TBID_SIGNUM_TR2(thread);
+       int irq = tbisig_map(signum);
+
+       /* Register the multiplexed IRQ handler */
+       irq_set_chained_handler(irq, meta_intc_irq_demux);
+       irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * meta_intc_no_mask() - indicate lack of HWMASKEXT registers
+ *
+ * Called from SoC code (or init code below) to dynamically indicate the lack of
+ * HWMASKEXT registers (for example depending on some SoC revision register).
+ * This alters the irq mask and unmask callbacks to use the fallback
+ * unvectoring/retriggering technique instead of using HWMASKEXT registers.
+ */
+void __init meta_intc_no_mask(void)
+{
+       meta_intc_edge_chip.irq_mask    = meta_intc_mask_irq_nomask;
+       meta_intc_edge_chip.irq_unmask  = meta_intc_unmask_edge_irq_nomask;
+       meta_intc_level_chip.irq_mask   = meta_intc_mask_irq_nomask;
+       meta_intc_level_chip.irq_unmask = meta_intc_unmask_level_irq_nomask;
+}
+
+/**
+ * init_external_IRQ() - initialise the external irq controller
+ *
+ * Set up the external irq controller using device tree properties. This is
+ * called from init_IRQ().
+ */
+int __init init_external_IRQ(void)
+{
+       struct meta_intc_priv *priv = &meta_intc_priv;
+       struct device_node *node;
+       int ret, cpu;
+       u32 val;
+       bool no_masks = false;
+
+       node = of_find_compatible_node(NULL, NULL, "img,meta-intc");
+       if (!node)
+               return -ENOENT;
+
+       /* Get number of banks */
+       ret = of_property_read_u32(node, "num-banks", &val);
+       if (ret) {
+               pr_err("meta-intc: No num-banks property found\n");
+               return ret;
+       }
+       if (val < 1 || val > 4) {
+               pr_err("meta-intc: num-banks (%u) out of range\n", val);
+               return -EINVAL;
+       }
+       priv->nr_banks = val;
+
+       /* Are any mask registers present? */
+       if (of_get_property(node, "no-mask", NULL))
+               no_masks = true;
+
+       /* No HWMASKEXT registers present? */
+       if (no_masks)
+               meta_intc_no_mask();
+
+       /* Set up an IRQ domain */
+       /*
+        * This is a legacy IRQ domain for now until all the platform setup code
+        * has been converted to devicetree.
+        */
+       priv->domain = irq_domain_add_linear(node, priv->nr_banks*32,
+                                            &meta_intc_domain_ops, priv);
+       if (unlikely(!priv->domain)) {
+               pr_err("meta-intc: cannot add IRQ domain\n");
+               return -ENOMEM;
+       }
+
+       /* Setup TR2 for all cpus. */
+       for_each_possible_cpu(cpu)
+               meta_intc_init_cpu(priv, cpu);
+
+       /* Set up system suspend/resume callbacks */
+       meta_intc_init_syscore_ops(priv);
+
+       pr_info("meta-intc: External IRQ controller initialised (%u IRQs)\n",
+               priv->nr_banks*32);
+
+       return 0;
+}
diff --git a/drivers/irqchip/irq-metag.c b/drivers/irqchip/irq-metag.c
new file mode 100644 (file)
index 0000000..8e94d7a
--- /dev/null
@@ -0,0 +1,343 @@
+/*
+ * Meta internal (HWSTATMETA) interrupt code.
+ *
+ * Copyright (C) 2011-2012 Imagination Technologies Ltd.
+ *
+ * This code is based on the code in SoC/common/irq.c and SoC/comet/irq.c
+ * The code base could be generalised/merged as a lot of the functionality is
+ * similar. Until this is done, we try to keep the code simple here.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irqdomain.h>
+
+#include <asm/irq.h>
+#include <asm/hwthread.h>
+
+#define PERF0VECINT            0x04820580
+#define PERF1VECINT            0x04820588
+#define PERF0TRIG_OFFSET       16
+#define PERF1TRIG_OFFSET       17
+
+/**
+ * struct metag_internal_irq_priv - private meta internal interrupt data
+ * @domain:            IRQ domain for all internal Meta IRQs (HWSTATMETA)
+ * @unmasked:          Record of unmasked IRQs
+ */
+struct metag_internal_irq_priv {
+       struct irq_domain       *domain;
+
+       unsigned long           unmasked;
+};
+
+/* Private data for the one and only internal interrupt controller */
+static struct metag_internal_irq_priv metag_internal_irq_priv;
+
+static unsigned int metag_internal_irq_startup(struct irq_data *data);
+static void metag_internal_irq_shutdown(struct irq_data *data);
+static void metag_internal_irq_ack(struct irq_data *data);
+static void metag_internal_irq_mask(struct irq_data *data);
+static void metag_internal_irq_unmask(struct irq_data *data);
+#ifdef CONFIG_SMP
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+                       const struct cpumask *cpumask, bool force);
+#endif
+
+static struct irq_chip internal_irq_edge_chip = {
+       .name = "HWSTATMETA-IRQ",
+       .irq_startup = metag_internal_irq_startup,
+       .irq_shutdown = metag_internal_irq_shutdown,
+       .irq_ack = metag_internal_irq_ack,
+       .irq_mask = metag_internal_irq_mask,
+       .irq_unmask = metag_internal_irq_unmask,
+#ifdef CONFIG_SMP
+       .irq_set_affinity = metag_internal_irq_set_affinity,
+#endif
+};
+
+/*
+ *     metag_hwvec_addr - get the address of *VECINT regs of irq
+ *
+ *     This function is a table of supported triggers on HWSTATMETA
+ *     Could do with a structure, but better keep it simple. Changes
+ *     in this code should be rare.
+ */
+static inline void __iomem *metag_hwvec_addr(irq_hw_number_t hw)
+{
+       void __iomem *addr;
+
+       switch (hw) {
+       case PERF0TRIG_OFFSET:
+               addr = (void __iomem *)PERF0VECINT;
+               break;
+       case PERF1TRIG_OFFSET:
+               addr = (void __iomem *)PERF1VECINT;
+               break;
+       default:
+               addr = NULL;
+               break;
+       }
+       return addr;
+}
+
+/*
+ *     metag_internal_startup - setup an internal irq
+ *     @irq:   the irq to startup
+ *
+ *     Multiplex interrupts for @irq onto TR1. Clear any pending
+ *     interrupts.
+ */
+static unsigned int metag_internal_irq_startup(struct irq_data *data)
+{
+       /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+       metag_internal_irq_ack(data);
+
+       /* Enable the interrupt by unmasking it */
+       metag_internal_irq_unmask(data);
+
+       return 0;
+}
+
+/*
+ *     metag_internal_irq_shutdown - turn off the irq
+ *     @irq:   the irq number to turn off
+ *
+ *     Mask @irq and clear any pending interrupts.
+ *     Stop muxing @irq onto TR1.
+ */
+static void metag_internal_irq_shutdown(struct irq_data *data)
+{
+       /* Disable the IRQ at the core by masking it. */
+       metag_internal_irq_mask(data);
+
+       /* Clear (toggle) the bit in HWSTATMETA for our interrupt. */
+       metag_internal_irq_ack(data);
+}
+
+/*
+ *     metag_internal_irq_ack - acknowledge irq
+ *     @irq:   the irq to ack
+ */
+static void metag_internal_irq_ack(struct irq_data *data)
+{
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << hw;
+
+       if (metag_in32(HWSTATMETA) & bit)
+               metag_out32(bit, HWSTATMETA);
+}
+
+/**
+ * metag_internal_irq_mask() - mask an internal irq by unvectoring
+ * @data:      data for the internal irq to mask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is unvectored from the core
+ * and retriggered if necessary later.
+ */
+static void metag_internal_irq_mask(struct irq_data *data)
+{
+       struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+       irq_hw_number_t hw = data->hwirq;
+       void __iomem *vec_addr = metag_hwvec_addr(hw);
+
+       clear_bit(hw, &priv->unmasked);
+
+       /* there is no interrupt mask, so unvector the interrupt */
+       metag_out32(0, vec_addr);
+}
+
+/**
+ * meta_intc_unmask_edge_irq_nomask() - unmask an edge irq by revectoring
+ * @data:      data for the internal irq to unmask
+ *
+ * HWSTATMETA has no mask register. Instead the IRQ is revectored back to the
+ * core and retriggered if necessary.
+ */
+static void metag_internal_irq_unmask(struct irq_data *data)
+{
+       struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+       irq_hw_number_t hw = data->hwirq;
+       unsigned int bit = 1 << hw;
+       void __iomem *vec_addr = metag_hwvec_addr(hw);
+       unsigned int thread = hard_processor_id();
+
+       set_bit(hw, &priv->unmasked);
+
+       /* there is no interrupt mask, so revector the interrupt */
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)), vec_addr);
+
+       /*
+        * Re-trigger interrupt
+        *
+        * Writing a 1 toggles, and a 0->1 transition triggers. We only
+        * retrigger if the status bit is already set, which means we
+        * need to clear it first. Retriggering is fundamentally racy
+        * because if the interrupt fires again after we clear it we
+        * could end up clearing it again and the interrupt handler
+        * thinking it hasn't fired. Therefore we need to keep trying to
+        * retrigger until the bit is set.
+        */
+       if (metag_in32(HWSTATMETA) & bit) {
+               metag_out32(bit, HWSTATMETA);
+               while (!(metag_in32(HWSTATMETA) & bit))
+                       metag_out32(bit, HWSTATMETA);
+       }
+}
+
+#ifdef CONFIG_SMP
+/*
+ *     metag_internal_irq_set_affinity - set the affinity for an interrupt
+ */
+static int metag_internal_irq_set_affinity(struct irq_data *data,
+                       const struct cpumask *cpumask, bool force)
+{
+       unsigned int cpu, thread;
+       irq_hw_number_t hw = data->hwirq;
+       /*
+        * Wire up this interrupt from *VECINT to the Meta core.
+        *
+        * Note that we can't wire up *VECINT to interrupt more than
+        * one cpu (the interrupt code doesn't support it), so we just
+        * pick the first cpu we find in 'cpumask'.
+        */
+       cpu = cpumask_any(cpumask);
+       thread = cpu_2_hwthread_id[cpu];
+
+       metag_out32(TBI_TRIG_VEC(TBID_SIGNUM_TR1(thread)),
+                   metag_hwvec_addr(hw));
+
+       return 0;
+}
+#endif
+
+/*
+ *     metag_internal_irq_demux - irq de-multiplexer
+ *     @irq:   the interrupt number
+ *     @desc:  the interrupt description structure for this irq
+ *
+ *     The cpu receives an interrupt on TR1 when an interrupt has
+ *     occurred. It is this function's job to demux this irq and
+ *     figure out exactly which trigger needs servicing.
+ */
+static void metag_internal_irq_demux(unsigned int irq, struct irq_desc *desc)
+{
+       struct metag_internal_irq_priv *priv = irq_desc_get_handler_data(desc);
+       irq_hw_number_t hw;
+       unsigned int irq_no;
+       u32 status;
+
+recalculate:
+       status = metag_in32(HWSTATMETA) & priv->unmasked;
+
+       for (hw = 0; status != 0; status >>= 1, ++hw) {
+               if (status & 0x1) {
+                       /*
+                        * Map the hardware IRQ number to a virtual Linux IRQ
+                        * number.
+                        */
+                       irq_no = irq_linear_revmap(priv->domain, hw);
+
+                       /*
+                        * Only fire off interrupts that are
+                        * registered to be handled by the kernel.
+                        * Other interrupts are probably being
+                        * handled by other Meta hardware threads.
+                        */
+                       generic_handle_irq(irq_no);
+
+                       /*
+                        * The handler may have re-enabled interrupts
+                        * which could have caused a nested invocation
+                        * of this code and make the copy of the
+                        * status register we are using invalid.
+                        */
+                       goto recalculate;
+               }
+       }
+}
+
+/**
+ * internal_irq_map() - Map an internal meta IRQ to a virtual IRQ number.
+ * @hw:                Number of the internal IRQ. Must be in range.
+ *
+ * Returns:    The virtual IRQ number of the Meta internal IRQ specified by
+ *             @hw.
+ */
+int internal_irq_map(unsigned int hw)
+{
+       struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+       if (!priv->domain)
+               return -ENODEV;
+       return irq_create_mapping(priv->domain, hw);
+}
+
+/**
+ *     metag_internal_irq_init_cpu - regsister with the Meta cpu
+ *     @cpu:   the CPU to register on
+ *
+ *     Configure @cpu's TR1 irq so that we can demux irqs.
+ */
+static void metag_internal_irq_init_cpu(struct metag_internal_irq_priv *priv,
+                                       int cpu)
+{
+       unsigned int thread = cpu_2_hwthread_id[cpu];
+       unsigned int signum = TBID_SIGNUM_TR1(thread);
+       int irq = tbisig_map(signum);
+
+       /* Register the multiplexed IRQ handler */
+       irq_set_handler_data(irq, priv);
+       irq_set_chained_handler(irq, metag_internal_irq_demux);
+       irq_set_irq_type(irq, IRQ_TYPE_LEVEL_LOW);
+}
+
+/**
+ * metag_internal_intc_map() - map an internal irq
+ * @d:         irq domain of internal trigger block
+ * @irq:       virtual irq number
+ * @hw:                hardware irq number within internal trigger block
+ *
+ * This sets up a virtual irq for a specified hardware interrupt. The irq chip
+ * and handler is configured.
+ */
+static int metag_internal_intc_map(struct irq_domain *d, unsigned int irq,
+                                  irq_hw_number_t hw)
+{
+       /* only register interrupt if it is mapped */
+       if (!metag_hwvec_addr(hw))
+               return -EINVAL;
+
+       irq_set_chip_and_handler(irq, &internal_irq_edge_chip,
+                                handle_edge_irq);
+       return 0;
+}
+
+static const struct irq_domain_ops metag_internal_intc_domain_ops = {
+       .map    = metag_internal_intc_map,
+};
+
+/**
+ *     metag_internal_irq_register - register internal IRQs
+ *
+ *     Register the irq chip and handler function for all internal IRQs
+ */
+int __init init_internal_IRQ(void)
+{
+       struct metag_internal_irq_priv *priv = &metag_internal_irq_priv;
+       unsigned int cpu;
+
+       /* Set up an IRQ domain */
+       priv->domain = irq_domain_add_linear(NULL, 32,
+                                            &metag_internal_intc_domain_ops,
+                                            priv);
+       if (unlikely(!priv->domain)) {
+               pr_err("meta-internal-intc: cannot add IRQ domain\n");
+               return -ENOMEM;
+       }
+
+       /* Setup TR1 for all cpus. */
+       for_each_possible_cpu(cpu)
+               metag_internal_irq_init_cpu(priv, cpu);
+
+       return 0;
+};
index a5702d74d2bdb6529d0dada48a6882329206854f..3939829f6c5cc67e7d0ca1e9518f1df834be5088 100644 (file)
@@ -322,6 +322,8 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
        return 0;
 }
 
+#ifndef elf_map
+
 static unsigned long elf_map(struct file *filep, unsigned long addr,
                struct elf_phdr *eppnt, int prot, int type,
                unsigned long total_size)
@@ -356,6 +358,8 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
        return(map_addr);
 }
 
+#endif /* !elf_map */
+
 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
 {
        int i, first_idx = -1, last_idx = -1;
index aba53083297de4a6f9acec799032b897fc9fc235..ac9da00e9f2c66b104029aa7b89253802708a322 100644 (file)
@@ -346,6 +346,7 @@ extern void ioport_unmap(void __iomem *p);
 #define xlate_dev_kmem_ptr(p)  p
 #define xlate_dev_mem_ptr(p)   __va(p)
 
+#ifdef CONFIG_VIRT_TO_BUS
 #ifndef virt_to_bus
 static inline unsigned long virt_to_bus(volatile void *address)
 {
@@ -357,6 +358,7 @@ static inline void *bus_to_virt(unsigned long address)
        return (void *) address;
 }
 #endif
+#endif
 
 #ifndef memset_io
 #define memset_io(a, b, c)     memset(__io_virt(a), (b), (c))
index 257c55ec4f7752e59408ee4d5808d15290cde0fe..4077b5d9ff8184f6a4446273b8fcaed792206dc0 100644 (file)
  * but it doesn't work on all toolchains, so we just do it by hand
  */
 #ifndef cond_syscall
-#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
+#ifdef CONFIG_SYMBOL_PREFIX
+#define __SYMBOL_PREFIX CONFIG_SYMBOL_PREFIX
+#else
+#define __SYMBOL_PREFIX
+#endif
+#define cond_syscall(x) asm(".weak\t" __SYMBOL_PREFIX #x "\n\t" \
+                           ".set\t" __SYMBOL_PREFIX #x "," \
+                           __SYMBOL_PREFIX "sys_ni_syscall")
 #endif
diff --git a/include/clocksource/metag_generic.h b/include/clocksource/metag_generic.h
new file mode 100644 (file)
index 0000000..ac17e7d
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2013 Imaginaton Technologies Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __CLKSOURCE_METAG_GENERIC_H
+#define __CLKSOURCE_METAG_GENERIC_H
+
+extern int metag_generic_timer_init(void);
+
+#endif /* __CLKSOURCE_METAG_GENERIC_H */
diff --git a/include/linux/irqchip/metag-ext.h b/include/linux/irqchip/metag-ext.h
new file mode 100644 (file)
index 0000000..697af0f
--- /dev/null
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2012 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_EXT_H_
+#define _LINUX_IRQCHIP_METAG_EXT_H_
+
+struct irq_data;
+struct platform_device;
+
+/* called from core irq code at init */
+int init_external_IRQ(void);
+
+/*
+ * called from SoC init_irq() callback to dynamically indicate the lack of
+ * HWMASKEXT registers.
+ */
+void meta_intc_no_mask(void);
+
+/*
+ * These allow SoCs to specialise the interrupt controller from their init_irq
+ * callbacks.
+ */
+
+extern struct irq_chip meta_intc_edge_chip;
+extern struct irq_chip meta_intc_level_chip;
+
+/* this should be called in the mask callback */
+void meta_intc_mask_irq_simple(struct irq_data *data);
+/* this should be called in the unmask callback */
+void meta_intc_unmask_irq_simple(struct irq_data *data);
+
+#endif /* _LINUX_IRQCHIP_METAG_EXT_H_ */
diff --git a/include/linux/irqchip/metag.h b/include/linux/irqchip/metag.h
new file mode 100644 (file)
index 0000000..4ebdfb3
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) 2011 Imagination Technologies
+ */
+
+#ifndef _LINUX_IRQCHIP_METAG_H_
+#define _LINUX_IRQCHIP_METAG_H_
+
+#include <linux/errno.h>
+
+#ifdef CONFIG_METAG_PERFCOUNTER_IRQS
+extern int init_internal_IRQ(void);
+extern int internal_irq_map(unsigned int hw);
+#else
+static inline int init_internal_IRQ(void)
+{
+       return 0;
+}
+static inline int internal_irq_map(unsigned int hw)
+{
+       return -EINVAL;
+}
+#endif
+
+#endif /* _LINUX_IRQCHIP_METAG_H_ */
index 1ede55f292c2879e418269acc754c736a56b7656..7acc9dc73c9f272bda990e741041a1b5c1237cc5 100644 (file)
@@ -115,6 +115,8 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_SAO                VM_ARCH_1       /* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP    VM_ARCH_1
+#elif defined(CONFIG_METAG)
+# define VM_GROWSUP    VM_ARCH_1
 #elif defined(CONFIG_IA64)
 # define VM_GROWSUP    VM_ARCH_1
 #elif !defined(CONFIG_MMU)
index 900b9484445b2e24d0df0ccc1a218484a7af9ca8..8072d352b98f53b1b0ddc08d2e5dc2aa71e416dc 100644 (file)
@@ -395,6 +395,8 @@ typedef struct elf64_shdr {
 #define NT_ARM_TLS     0x401           /* ARM TLS register */
 #define NT_ARM_HW_BREAK        0x402           /* ARM hardware breakpoint registers */
 #define NT_ARM_HW_WATCH        0x403           /* ARM hardware watchpoint registers */
+#define NT_METAG_CBUF  0x500           /* Metag catch buffer registers */
+#define NT_METAG_RPIPE 0x501           /* Metag read pipeline state */
 
 
 /* Note header in a PT_NOTE section */
index 7244acde77b0a9a5670d401fc505c64c5d8b06c0..6989df2ba1947bf58879ecf2d5cc4261c9fdfc2c 100644 (file)
@@ -178,7 +178,7 @@ void tracing_off_permanent(void)
 #define RB_MAX_SMALL_DATA      (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 #define RB_EVNT_MIN_SIZE       8U      /* two 32bit words */
 
-#if !defined(CONFIG_64BIT) || defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
 # define RB_FORCE_8BYTE_ALIGNMENT      0
 # define RB_ARCH_ALIGNMENT             RB_ALIGNMENT
 #else
@@ -186,6 +186,8 @@ void tracing_off_permanent(void)
 # define RB_ARCH_ALIGNMENT             8U
 #endif
 
+#define RB_ALIGN_DATA          __aligned(RB_ARCH_ALIGNMENT)
+
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
@@ -334,7 +336,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 struct buffer_data_page {
        u64              time_stamp;    /* page time stamp */
        local_t          commit;        /* write committed index */
-       unsigned char    data[];        /* data of buffer page */
+       unsigned char    data[] RB_ALIGN_DATA;  /* data of buffer page */
 };
 
 /*
index e4a7f808fa0630b31f5d7ba466e280a5bbb3c217..28be08c09babda27bb77959538c9645962f84d75 100644 (file)
@@ -674,7 +674,7 @@ config STACKTRACE
 
 config DEBUG_STACK_USAGE
        bool "Stack utilization instrumentation"
-       depends on DEBUG_KERNEL && !IA64 && !PARISC
+       depends on DEBUG_KERNEL && !IA64 && !PARISC && !METAG
        help
          Enables the display of the minimum amount of free stack which each
          task has ever had available in the sysrq-T and sysrq-P debug output.
@@ -855,7 +855,7 @@ config FRAME_POINTER
        bool "Compile the kernel with frame pointers"
        depends on DEBUG_KERNEL && \
                (CRIS || M68K || FRV || UML || \
-                AVR32 || SUPERH || BLACKFIN || MN10300) || \
+                AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || \
                ARCH_WANT_FRAME_POINTERS
        default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
        help
index 17e38439670520fc580fb23362b7141df1ac334a..544aa56b6200a0610a1b0841ce34ed312955c3f0 100755 (executable)
@@ -34,7 +34,7 @@ use strict;
 # $1 (first bracket) matches the dynamic amount of the stack growth
 #
 # use anything else and feel the pain ;)
-my (@stack, $re, $dre, $x, $xs);
+my (@stack, $re, $dre, $x, $xs, $funcre);
 {
        my $arch = shift;
        if ($arch eq "") {
@@ -44,6 +44,7 @@ my (@stack, $re, $dre, $x, $xs);
 
        $x      = "[0-9a-f]";   # hex character
        $xs     = "[0-9a-f ]";  # hex character or space
+       $funcre = qr/^$x* <(.*)>:$/;
        if ($arch eq 'arm') {
                #c0008ffc:      e24dd064        sub     sp, sp, #100    ; 0x64
                $re = qr/.*sub.*sp, sp, #(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -66,6 +67,10 @@ my (@stack, $re, $dre, $x, $xs);
                #    2b6c:       4e56 fb70       linkw %fp,#-1168
                #  1df770:       defc ffe4       addaw #-28,%sp
                $re = qr/.*(?:linkw %fp,|addaw )#-([0-9]{1,4})(?:,%sp)?$/o;
+       } elsif ($arch eq 'metag') {
+               #400026fc:       40 00 00 82     ADD       A0StP,A0StP,#0x8
+               $re = qr/.*ADD.*A0StP,A0StP,\#(0x$x{1,8})/o;
+               $funcre = qr/^$x* <[^\$](.*)>:$/;
        } elsif ($arch eq 'mips64') {
                #8800402c:       67bdfff0        daddiu  sp,sp,-16
                $re = qr/.*daddiu.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
@@ -109,7 +114,6 @@ my (@stack, $re, $dre, $x, $xs);
 #
 # main()
 #
-my $funcre = qr/^$x* <(.*)>:$/;
 my ($func, $file, $lastslash);
 
 while (my $line = <STDIN>) {
index 8a106499ec4fd627c183dc12eac1a607f2fabd80..d25e4a118d3783f52d0bfbd3d74c347b7e1d7c08 100644 (file)
@@ -826,7 +826,8 @@ int main(int argc, char **argv)
                        genksyms_usage();
                        return 1;
                }
-       if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0))
+       if ((strcmp(arch, "h8300") == 0) || (strcmp(arch, "blackfin") == 0) ||
+           (strcmp(arch, "metag") == 0))
                mod_prefix = "_";
        {
                extern int yydebug;
index ee52cb8e17adf1d8d842559fb66dbd3ad5c2fe52..9c22317778eb7e3f995c73845cfcceba15062739 100644 (file)
 #include <string.h>
 #include <unistd.h>
 
+#ifndef EM_METAG
+/* Remove this when these make it to the standard system elf.h. */
+#define EM_METAG      174
+#define R_METAG_ADDR32                   2
+#define R_METAG_NONE                     3
+#endif
+
 static int fd_map;     /* File descriptor for file being modified. */
 static int mmap_failed; /* Boolean flag. */
 static void *ehdr_curr; /* current ElfXX_Ehdr *  for resource cleanup */
@@ -341,6 +348,12 @@ do_file(char const *const fname)
                         altmcount = "__gnu_mcount_nc";
                         break;
        case EM_IA_64:   reltype = R_IA64_IMM64;   gpfx = '_'; break;
+       case EM_METAG:   reltype = R_METAG_ADDR32;
+                        altmcount = "_mcount_wrapper";
+                        rel_type_nop = R_METAG_NONE;
+                        /* We happen to have the same requirement as MIPS */
+                        is_fake_mcount32 = MIPS32_is_fake_mcount;
+                        break;
        case EM_MIPS:    /* reltype: e_class    */ gpfx = '_'; break;
        case EM_PPC:     reltype = R_PPC_ADDR32;   gpfx = '_'; break;
        case EM_PPC64:   reltype = R_PPC64_ADDR64; gpfx = '_'; break;
index d5818c98d0511ac01dbd22dfc8adb9dbf6054681..74659ecf93e02a5e70f547834344b783d31e48db 100644 (file)
 #define CPUINFO_PROC   "Processor"
 #endif
 
+#ifdef __metag__
+#define rmb()          asm volatile("" ::: "memory")
+#define cpu_relax()    asm volatile("" ::: "memory")
+#define CPUINFO_PROC   "CPU"
+#endif
+
 #include <time.h>
 #include <unistd.h>
 #include <sys/types.h>