]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/arm/kernel/hw_breakpoint.c
Merge tag 'v2.6.38' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[mv-sheeva.git] / arch / arm / kernel / hw_breakpoint.c
index 21e3a4ab3b8c58047304b694aa8161b9ebbf1c31..44b84fe6e1b0fdc544c6f306ed33384a33734fab 100644 (file)
@@ -24,6 +24,7 @@
 #define pr_fmt(fmt) "hw-breakpoint: " fmt
 
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/smp.h>
@@ -44,6 +45,7 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]);
 
 /* Number of BRP/WRP registers on this CPU. */
 static int core_num_brps;
+static int core_num_reserved_brps;
 static int core_num_wrps;
 
 /* Debug architecture version. */
@@ -52,87 +54,6 @@ static u8 debug_arch;
 /* Maximum supported watchpoint length. */
 static u8 max_watchpoint_len;
 
-/* Determine number of BRP registers available. */
-static int get_num_brps(void)
-{
-       u32 didr;
-       ARM_DBG_READ(c0, 0, didr);
-       return ((didr >> 24) & 0xf) + 1;
-}
-
-/* Determine number of WRP registers available. */
-static int get_num_wrps(void)
-{
-       /*
-        * FIXME: When a watchpoint fires, the only way to work out which
-        * watchpoint it was is by disassembling the faulting instruction
-        * and working out the address of the memory access.
-        *
-        * Furthermore, we can only do this if the watchpoint was precise
-        * since imprecise watchpoints prevent us from calculating register
-        * based addresses.
-        *
-        * For the time being, we only report 1 watchpoint register so we
-        * always know which watchpoint fired. In the future we can either
-        * add a disassembler and address generation emulator, or we can
-        * insert a check to see if the DFAR is set on watchpoint exception
-        * entry [the ARM ARM states that the DFAR is UNKNOWN, but
-        * experience shows that it is set on some implementations].
-        */
-
-#if 0
-       u32 didr, wrps;
-       ARM_DBG_READ(c0, 0, didr);
-       return ((didr >> 28) & 0xf) + 1;
-#endif
-
-       return 1;
-}
-
-int hw_breakpoint_slots(int type)
-{
-       /*
-        * We can be called early, so don't rely on
-        * our static variables being initialised.
-        */
-       switch (type) {
-       case TYPE_INST:
-               return get_num_brps();
-       case TYPE_DATA:
-               return get_num_wrps();
-       default:
-               pr_warning("unknown slot type: %d\n", type);
-               return 0;
-       }
-}
-
-/* Determine debug architecture. */
-static u8 get_debug_arch(void)
-{
-       u32 didr;
-
-       /* Do we implement the extended CPUID interface? */
-       if (((read_cpuid_id() >> 16) & 0xf) != 0xf) {
-               pr_warning("CPUID feature registers not supported. "
-                               "Assuming v6 debug is present.\n");
-               return ARM_DEBUG_ARCH_V6;
-       }
-
-       ARM_DBG_READ(c0, 0, didr);
-       return (didr >> 16) & 0xf;
-}
-
-/* Does this core support mismatch breakpoints? */
-static int core_has_mismatch_bps(void)
-{
-       return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1;
-}
-
-u8 arch_get_debug_arch(void)
-{
-       return debug_arch;
-}
-
 #define READ_WB_REG_CASE(OP2, M, VAL)          \
        case ((OP2 << 4) + M):                  \
                ARM_DBG_READ(c ## M, OP2, VAL); \
@@ -210,6 +131,99 @@ static void write_wb_reg(int n, u32 val)
        isb();
 }
 
+/* Determine debug architecture. */
+static u8 get_debug_arch(void)
+{
+       u32 didr;
+
+       /* Do we implement the extended CPUID interface? */
+       if (WARN_ONCE((((read_cpuid_id() >> 16) & 0xf) != 0xf),
+           "CPUID feature registers not supported. "
+           "Assuming v6 debug is present.\n"))
+               return ARM_DEBUG_ARCH_V6;
+
+       ARM_DBG_READ(c0, 0, didr);
+       return (didr >> 16) & 0xf;
+}
+
+u8 arch_get_debug_arch(void)
+{
+       return debug_arch;
+}
+
+static int debug_arch_supported(void)
+{
+       u8 arch = get_debug_arch();
+       return arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14;
+}
+
+/* Determine number of BRP register available. */
+static int get_num_brp_resources(void)
+{
+       u32 didr;
+       ARM_DBG_READ(c0, 0, didr);
+       return ((didr >> 24) & 0xf) + 1;
+}
+
+/* Does this core support mismatch breakpoints? */
+static int core_has_mismatch_brps(void)
+{
+       return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 &&
+               get_num_brp_resources() > 1);
+}
+
+/* Determine number of usable WRPs available. */
+static int get_num_wrps(void)
+{
+       /*
+        * FIXME: When a watchpoint fires, the only way to work out which
+        * watchpoint it was is by disassembling the faulting instruction
+        * and working out the address of the memory access.
+        *
+        * Furthermore, we can only do this if the watchpoint was precise
+        * since imprecise watchpoints prevent us from calculating register
+        * based addresses.
+        *
+        * Providing we have more than 1 breakpoint register, we only report
+        * a single watchpoint register for the time being. This way, we always
+        * know which watchpoint fired. In the future we can either add a
+        * disassembler and address generation emulator, or we can insert a
+        * check to see if the DFAR is set on watchpoint exception entry
+        * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows
+        * that it is set on some implementations].
+        */
+
+#if 0
+       int wrps;
+       u32 didr;
+       ARM_DBG_READ(c0, 0, didr);
+       wrps = ((didr >> 28) & 0xf) + 1;
+#endif
+       int wrps = 1;
+
+       if (core_has_mismatch_brps() && wrps >= get_num_brp_resources())
+               wrps = get_num_brp_resources() - 1;
+
+       return wrps;
+}
+
+/* We reserve one breakpoint for each watchpoint. */
+static int get_num_reserved_brps(void)
+{
+       if (core_has_mismatch_brps())
+               return get_num_wrps();
+       return 0;
+}
+
+/* Determine number of usable BRPs available. */
+static int get_num_brps(void)
+{
+       int brps = get_num_brp_resources();
+       if (core_has_mismatch_brps())
+               brps -= get_num_reserved_brps();
+       return brps;
+}
+
 /*
  * In order to access the breakpoint/watchpoint control registers,
  * we must be running in debug monitor mode. Unfortunately, we can
@@ -230,8 +244,12 @@ static int enable_monitor_mode(void)
                goto out;
        }
 
+       /* If monitor mode is already enabled, just return. */
+       if (dscr & ARM_DSCR_MDBGEN)
+               goto out;
+
        /* Write to the corresponding DSCR. */
-       switch (debug_arch) {
+       switch (get_debug_arch()) {
        case ARM_DEBUG_ARCH_V6:
        case ARM_DEBUG_ARCH_V6_1:
                ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN));
@@ -246,15 +264,33 @@ static int enable_monitor_mode(void)
 
        /* Check that the write made it through. */
        ARM_DBG_READ(c1, 0, dscr);
-       if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN),
-                               "failed to enable monitor mode.")) {
+       if (!(dscr & ARM_DSCR_MDBGEN))
                ret = -EPERM;
-       }
 
 out:
        return ret;
 }
 
+int hw_breakpoint_slots(int type)
+{
+       if (!debug_arch_supported())
+               return 0;
+
+       /*
+        * We can be called early, so don't rely on
+        * our static variables being initialised.
+        */
+       switch (type) {
+       case TYPE_INST:
+               return get_num_brps();
+       case TYPE_DATA:
+               return get_num_wrps();
+       default:
+               pr_warning("unknown slot type: %d\n", type);
+               return 0;
+       }
+}
+
 /*
  * Check if 8-bit byte-address select is available.
  * This clobbers WRP 0.
@@ -268,9 +304,6 @@ static u8 get_max_wp_len(void)
        if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14)
                goto out;
 
-       if (enable_monitor_mode())
-               goto out;
-
        memset(&ctrl, 0, sizeof(ctrl));
        ctrl.len = ARM_BREAKPOINT_LEN_8;
        ctrl_reg = encode_ctrl_reg(ctrl);
@@ -289,23 +322,6 @@ u8 arch_get_max_wp_len(void)
        return max_watchpoint_len;
 }
 
-/*
- * Handler for reactivating a suspended watchpoint when the single
- * step `mismatch' breakpoint is triggered.
- */
-static void wp_single_step_handler(struct perf_event *bp, int unused,
-                                  struct perf_sample_data *data,
-                                  struct pt_regs *regs)
-{
-       perf_event_enable(counter_arch_bp(bp)->suspended_wp);
-       unregister_hw_breakpoint(bp);
-}
-
-static int bp_is_single_step(struct perf_event *bp)
-{
-       return bp->overflow_handler == wp_single_step_handler;
-}
-
 /*
  * Install a perf counter breakpoint.
  */
@@ -314,30 +330,41 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
        struct arch_hw_breakpoint *info = counter_arch_bp(bp);
        struct perf_event **slot, **slots;
        int i, max_slots, ctrl_base, val_base, ret = 0;
+       u32 addr, ctrl;
 
        /* Ensure that we are in monitor mode and halting mode is disabled. */
        ret = enable_monitor_mode();
        if (ret)
                goto out;
 
+       addr = info->address;
+       ctrl = encode_ctrl_reg(info->ctrl) | 0x1;
+
        if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
                /* Breakpoint */
                ctrl_base = ARM_BASE_BCR;
                val_base = ARM_BASE_BVR;
-               slots = __get_cpu_var(bp_on_reg);
-               max_slots = core_num_brps - 1;
-
-               if (bp_is_single_step(bp)) {
-                       info->ctrl.mismatch = 1;
-                       i = max_slots;
-                       slots[i] = bp;
-                       goto setup;
+               slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+               max_slots = core_num_brps;
+               if (info->step_ctrl.enabled) {
+                       /* Override the breakpoint data with the step data. */
+                       addr = info->trigger & ~0x3;
+                       ctrl = encode_ctrl_reg(info->step_ctrl);
                }
        } else {
                /* Watchpoint */
-               ctrl_base = ARM_BASE_WCR;
-               val_base = ARM_BASE_WVR;
-               slots = __get_cpu_var(wp_on_reg);
+               if (info->step_ctrl.enabled) {
+                       /* Install into the reserved breakpoint region. */
+                       ctrl_base = ARM_BASE_BCR + core_num_brps;
+                       val_base = ARM_BASE_BVR + core_num_brps;
+                       /* Override the watchpoint data with the step data. */
+                       addr = info->trigger & ~0x3;
+                       ctrl = encode_ctrl_reg(info->step_ctrl);
+               } else {
+                       ctrl_base = ARM_BASE_WCR;
+                       val_base = ARM_BASE_WVR;
+               }
+               slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
                max_slots = core_num_wrps;
        }
 
@@ -355,12 +382,11 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
                goto out;
        }
 
-setup:
        /* Setup the address register. */
-       write_wb_reg(val_base + i, info->address);
+       write_wb_reg(val_base + i, addr);
 
        /* Setup the control register. */
-       write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1);
+       write_wb_reg(ctrl_base + i, ctrl);
 
 out:
        return ret;
@@ -375,18 +401,15 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
        if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) {
                /* Breakpoint */
                base = ARM_BASE_BCR;
-               slots = __get_cpu_var(bp_on_reg);
-               max_slots = core_num_brps - 1;
-
-               if (bp_is_single_step(bp)) {
-                       i = max_slots;
-                       slots[i] = NULL;
-                       goto reset;
-               }
+               slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+               max_slots = core_num_brps;
        } else {
                /* Watchpoint */
-               base = ARM_BASE_WCR;
-               slots = __get_cpu_var(wp_on_reg);
+               if (info->step_ctrl.enabled)
+                       base = ARM_BASE_BCR + core_num_brps;
+               else
+                       base = ARM_BASE_WCR;
+               slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
                max_slots = core_num_wrps;
        }
 
@@ -403,7 +426,6 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp)
        if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot"))
                return;
 
-reset:
        /* Reset the control register. */
        write_wb_reg(base + i, 0);
 }
@@ -537,12 +559,23 @@ static int arch_build_bp_info(struct perf_event *bp)
                return -EINVAL;
        }
 
+       /*
+        * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes.
+        * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported
+        * by the hardware and must be aligned to the appropriate number of
+        * bytes.
+        */
+       if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE &&
+           info->ctrl.len != ARM_BREAKPOINT_LEN_2 &&
+           info->ctrl.len != ARM_BREAKPOINT_LEN_4)
+               return -EINVAL;
+
        /* Address */
        info->address = bp->attr.bp_addr;
 
        /* Privilege */
        info->ctrl.privilege = ARM_BREAKPOINT_USER;
-       if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp))
+       if (arch_check_bp_in_kernelspace(bp))
                info->ctrl.privilege |= ARM_BREAKPOINT_PRIV;
 
        /* Enabled? */
@@ -561,7 +594,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
 {
        struct arch_hw_breakpoint *info = counter_arch_bp(bp);
        int ret = 0;
-       u32 bytelen, max_len, offset, alignment_mask = 0x3;
+       u32 offset, alignment_mask = 0x3;
 
        /* Build the arch_hw_breakpoint. */
        ret = arch_build_bp_info(bp);
@@ -571,84 +604,85 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
        /* Check address alignment. */
        if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
                alignment_mask = 0x7;
-       if (info->address & alignment_mask) {
-               /*
-                * Try to fix the alignment. This may result in a length
-                * that is too large, so we must check for that.
-                */
-               bytelen = get_hbp_len(info->ctrl.len);
-               max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 :
-                               max_watchpoint_len;
-
-               if (max_len >= 8)
-                       offset = info->address & 0x7;
-               else
-                       offset = info->address & 0x3;
-
-               if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) {
-                       ret = -EFBIG;
-                       goto out;
-               }
-
-               info->ctrl.len <<= offset;
-               info->address &= ~offset;
-
-               pr_debug("breakpoint alignment fixup: length = 0x%x, "
-                       "address = 0x%x\n", info->ctrl.len, info->address);
+       offset = info->address & alignment_mask;
+       switch (offset) {
+       case 0:
+               /* Aligned */
+               break;
+       case 1:
+               /* Allow single byte watchpoint. */
+               if (info->ctrl.len == ARM_BREAKPOINT_LEN_1)
+                       break;
+       case 2:
+               /* Allow halfword watchpoints and breakpoints. */
+               if (info->ctrl.len == ARM_BREAKPOINT_LEN_2)
+                       break;
+       default:
+               ret = -EINVAL;
+               goto out;
        }
 
+       info->address &= ~alignment_mask;
+       info->ctrl.len <<= offset;
+
        /*
         * Currently we rely on an overflow handler to take
         * care of single-stepping the breakpoint when it fires.
         * In the case of userspace breakpoints on a core with V7 debug,
-        * we can use the mismatch feature as a poor-man's hardware single-step.
+        * we can use the mismatch feature as a poor-man's hardware
+        * single-step, but this only works for per-task breakpoints.
         */
        if (WARN_ONCE(!bp->overflow_handler &&
-               (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()),
+               (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps()
+                || !bp->hw.bp_target),
                        "overflow handler required but none found")) {
                ret = -EINVAL;
-               goto out;
        }
 out:
        return ret;
 }
 
-static void update_mismatch_flag(int idx, int flag)
+/*
+ * Enable/disable single-stepping over the breakpoint bp at address addr.
+ */
+static void enable_single_step(struct perf_event *bp, u32 addr)
 {
-       struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]);
-       struct arch_hw_breakpoint *info;
-
-       if (bp == NULL)
-               return;
+       struct arch_hw_breakpoint *info = counter_arch_bp(bp);
 
-       info = counter_arch_bp(bp);
+       arch_uninstall_hw_breakpoint(bp);
+       info->step_ctrl.mismatch  = 1;
+       info->step_ctrl.len       = ARM_BREAKPOINT_LEN_4;
+       info->step_ctrl.type      = ARM_BREAKPOINT_EXECUTE;
+       info->step_ctrl.privilege = info->ctrl.privilege;
+       info->step_ctrl.enabled   = 1;
+       info->trigger             = addr;
+       arch_install_hw_breakpoint(bp);
+}
 
-       /* Update the mismatch field to enter/exit `single-step' mode */
-       if (!bp->overflow_handler && info->ctrl.mismatch != flag) {
-               info->ctrl.mismatch = flag;
-               write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1);
-       }
+static void disable_single_step(struct perf_event *bp)
+{
+       arch_uninstall_hw_breakpoint(bp);
+       counter_arch_bp(bp)->step_ctrl.enabled = 0;
+       arch_install_hw_breakpoint(bp);
 }
 
 static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
 {
        int i;
-       struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg);
+       struct perf_event *wp, **slots;
        struct arch_hw_breakpoint *info;
-       struct perf_event_attr attr;
+
+       slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
 
        /* Without a disassembler, we can only handle 1 watchpoint. */
        BUG_ON(core_num_wrps > 1);
 
-       hw_breakpoint_init(&attr);
-       attr.bp_addr    = regs->ARM_pc & ~0x3;
-       attr.bp_len     = HW_BREAKPOINT_LEN_4;
-       attr.bp_type    = HW_BREAKPOINT_X;
-
        for (i = 0; i < core_num_wrps; ++i) {
                rcu_read_lock();
 
-               if (slots[i] == NULL) {
+               wp = slots[i];
+
+               if (wp == NULL) {
                        rcu_read_unlock();
                        continue;
                }
@@ -658,87 +692,121 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs)
                 * single watchpoint, we can set the trigger to the lowest
                 * possible faulting address.
                 */
-               info = counter_arch_bp(slots[i]);
-               info->trigger = slots[i]->attr.bp_addr;
+               info = counter_arch_bp(wp);
+               info->trigger = wp->attr.bp_addr;
                pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
-               perf_bp_event(slots[i], regs);
+               perf_bp_event(wp, regs);
 
                /*
                 * If no overflow handler is present, insert a temporary
                 * mismatch breakpoint so we can single-step over the
                 * watchpoint trigger.
                 */
-               if (!slots[i]->overflow_handler) {
-                       bp = register_user_hw_breakpoint(&attr,
-                                                        wp_single_step_handler,
-                                                        current);
-                       counter_arch_bp(bp)->suspended_wp = slots[i];
-                       perf_event_disable(slots[i]);
-               }
+               if (!wp->overflow_handler)
+                       enable_single_step(wp, instruction_pointer(regs));
 
                rcu_read_unlock();
        }
 }
 
+static void watchpoint_single_step_handler(unsigned long pc)
+{
+       int i;
+       struct perf_event *wp, **slots;
+       struct arch_hw_breakpoint *info;
+
+       slots = (struct perf_event **)__get_cpu_var(wp_on_reg);
+
+       for (i = 0; i < core_num_reserved_brps; ++i) {
+               rcu_read_lock();
+
+               wp = slots[i];
+
+               if (wp == NULL)
+                       goto unlock;
+
+               info = counter_arch_bp(wp);
+               if (!info->step_ctrl.enabled)
+                       goto unlock;
+
+               /*
+                * Restore the original watchpoint if we've completed the
+                * single-step.
+                */
+               if (info->trigger != pc)
+                       disable_single_step(wp);
+
+unlock:
+               rcu_read_unlock();
+       }
+}
+
 static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
 {
        int i;
-       int mismatch;
        u32 ctrl_reg, val, addr;
-       struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg);
+       struct perf_event *bp, **slots;
        struct arch_hw_breakpoint *info;
        struct arch_hw_breakpoint_ctrl ctrl;
 
+       slots = (struct perf_event **)__get_cpu_var(bp_on_reg);
+
        /* The exception entry code places the amended lr in the PC. */
        addr = regs->ARM_pc;
 
+       /* Check the currently installed breakpoints first. */
        for (i = 0; i < core_num_brps; ++i) {
                rcu_read_lock();
 
                bp = slots[i];
 
-               if (bp == NULL) {
-                       rcu_read_unlock();
-                       continue;
-               }
+               if (bp == NULL)
+                       goto unlock;
 
-               mismatch = 0;
+               info = counter_arch_bp(bp);
 
                /* Check if the breakpoint value matches. */
                val = read_wb_reg(ARM_BASE_BVR + i);
                if (val != (addr & ~0x3))
-                       goto unlock;
+                       goto mismatch;
 
                /* Possible match, check the byte address select to confirm. */
                ctrl_reg = read_wb_reg(ARM_BASE_BCR + i);
                decode_ctrl_reg(ctrl_reg, &ctrl);
                if ((1 << (addr & 0x3)) & ctrl.len) {
-                       mismatch = 1;
-                       info = counter_arch_bp(bp);
                        info->trigger = addr;
-               }
-
-unlock:
-               if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) {
                        pr_debug("breakpoint fired: address = 0x%x\n", addr);
                        perf_bp_event(bp, regs);
+                       if (!bp->overflow_handler)
+                               enable_single_step(bp, addr);
+                       goto unlock;
                }
 
-               update_mismatch_flag(i, mismatch);
+mismatch:
+               /* If we're stepping a breakpoint, it can now be restored. */
+               if (info->step_ctrl.enabled)
+                       disable_single_step(bp);
+unlock:
                rcu_read_unlock();
        }
+
+       /* Handle any pending watchpoint single-step breakpoints. */
+       watchpoint_single_step_handler(addr);
 }
 
 /*
  * Called from either the Data Abort Handler [watchpoint] or the
- * Prefetch Abort Handler [breakpoint].
+ * Prefetch Abort Handler [breakpoint] with preemption disabled.
  */
 static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
                                 struct pt_regs *regs)
 {
-       int ret = 1; /* Unhandled fault. */
+       int ret = 0;
        u32 dscr;
 
+       /* We must be called with preemption disabled. */
+       WARN_ON(preemptible());
+
        /* We only handle watchpoints and hardware breakpoints. */
        ARM_DBG_READ(c1, 0, dscr);
 
@@ -753,25 +821,60 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr,
                watchpoint_handler(addr, regs);
                break;
        default:
-               goto out;
+               ret = 1; /* Unhandled fault. */
        }
 
-       ret = 0;
-out:
+       /*
+        * Re-enable preemption after it was disabled in the
+        * low-level exception handling code.
+        */
+       preempt_enable();
+
        return ret;
 }
 
 /*
  * One-time initialisation.
  */
-static void __init reset_ctrl_regs(void *unused)
+static void reset_ctrl_regs(void *info)
 {
-       int i;
+       int i, cpu = smp_processor_id();
+       u32 dbg_power;
+       cpumask_t *cpumask = info;
+
+       /*
+        * v7 debug contains save and restore registers so that debug state
+        * can be maintained across low-power modes without leaving the debug
+        * logic powered up. It is IMPLEMENTATION DEFINED whether we can access
+        * the debug registers out of reset, so we must unlock the OS Lock
+        * Access Register to avoid taking undefined instruction exceptions
+        * later on.
+        */
+       if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) {
+               /*
+                * Ensure sticky power-down is clear (i.e. debug logic is
+                * powered up).
+                */
+               asm volatile("mrc p14, 0, %0, c1, c5, 4" : "=r" (dbg_power));
+               if ((dbg_power & 0x1) == 0) {
+                       pr_warning("CPU %d debug is powered down!\n", cpu);
+                       cpumask_or(cpumask, cpumask, cpumask_of(cpu));
+                       return;
+               }
+
+               /*
+                * Unconditionally clear the lock by writing a value
+                * other than 0xC5ACCE55 to the access register.
+                */
+               asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0));
+               isb();
+       }
 
        if (enable_monitor_mode())
                return;
 
-       for (i = 0; i < core_num_brps; ++i) {
+       /* We must also reset any reserved registers. */
+       for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) {
                write_wb_reg(ARM_BASE_BCR + i, 0UL);
                write_wb_reg(ARM_BASE_BVR + i, 0UL);
        }
@@ -782,45 +885,64 @@ static void __init reset_ctrl_regs(void *unused)
        }
 }
 
+static int __cpuinit dbg_reset_notify(struct notifier_block *self,
+                                     unsigned long action, void *cpu)
+{
+       if (action == CPU_ONLINE)
+               smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1);
+       return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata dbg_reset_nb = {
+       .notifier_call = dbg_reset_notify,
+};
+
 static int __init arch_hw_breakpoint_init(void)
 {
-       int ret = 0;
        u32 dscr;
+       cpumask_t cpumask = { CPU_BITS_NONE };
 
        debug_arch = get_debug_arch();
 
-       if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) {
+       if (!debug_arch_supported()) {
                pr_info("debug architecture 0x%x unsupported.\n", debug_arch);
-               ret = -ENODEV;
-               goto out;
+               return 0;
        }
 
        /* Determine how many BRPs/WRPs are available. */
        core_num_brps = get_num_brps();
+       core_num_reserved_brps = get_num_reserved_brps();
        core_num_wrps = get_num_wrps();
 
        pr_info("found %d breakpoint and %d watchpoint registers.\n",
-                       core_num_brps, core_num_wrps);
+               core_num_brps + core_num_reserved_brps, core_num_wrps);
+
+       if (core_num_reserved_brps)
+               pr_info("%d breakpoint(s) reserved for watchpoint "
+                               "single-step.\n", core_num_reserved_brps);
 
-       if (core_has_mismatch_bps())
-               pr_info("1 breakpoint reserved for watchpoint single-step.\n");
+       /*
+        * Reset the breakpoint resources. We assume that a halting
+        * debugger will leave the world in a nice state for us.
+        */
+       on_each_cpu(reset_ctrl_regs, &cpumask, 1);
+       if (!cpumask_empty(&cpumask)) {
+               core_num_brps = 0;
+               core_num_reserved_brps = 0;
+               core_num_wrps = 0;
+               return 0;
+       }
 
        ARM_DBG_READ(c1, 0, dscr);
        if (dscr & ARM_DSCR_HDBGEN) {
+               max_watchpoint_len = 4;
                pr_warning("halting debug mode enabled. Assuming maximum "
-                               "watchpoint size of 4 bytes.");
+                          "watchpoint size of %u bytes.", max_watchpoint_len);
        } else {
                /* Work out the maximum supported watchpoint length. */
                max_watchpoint_len = get_max_wp_len();
                pr_info("maximum watchpoint size is %u bytes.\n",
                                max_watchpoint_len);
-
-               /*
-                * Reset the breakpoint resources. We assume that a halting
-                * debugger will leave the world in a nice state for us.
-                */
-               smp_call_function(reset_ctrl_regs, NULL, 1);
-               reset_ctrl_regs(NULL);
        }
 
        /* Register debug fault handler. */
@@ -829,8 +951,9 @@ static int __init arch_hw_breakpoint_init(void)
        hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT,
                        "breakpoint debug exception");
 
-out:
-       return ret;
+       /* Register hotplug notifier. */
+       register_cpu_notifier(&dbg_reset_nb);
+       return 0;
 }
 arch_initcall(arch_hw_breakpoint_init);