]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
[S390] kdump backend code
authorMichael Holzheu <holzheu@linux.vnet.ibm.com>
Mon, 26 Sep 2011 14:42:47 +0000 (16:42 +0200)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Mon, 26 Sep 2011 14:42:46 +0000 (16:42 +0200)
This patch provides the architecture specific part of the s390 kdump
support.

Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
18 files changed:
arch/s390/Kconfig
arch/s390/include/asm/ipl.h
arch/s390/include/asm/kexec.h
arch/s390/include/asm/reset.h
arch/s390/include/asm/setup.h
arch/s390/kernel/Makefile
arch/s390/kernel/base.S
arch/s390/kernel/crash_dump.c [new file with mode: 0644]
arch/s390/kernel/head.S
arch/s390/kernel/head_kdump.S [new file with mode: 0644]
arch/s390/kernel/ipl.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/mem_detect.c
arch/s390/kernel/reipl.S
arch/s390/kernel/reipl64.S
arch/s390/kernel/setup.c
arch/s390/mm/vmem.c
drivers/s390/cio/cio.c

index ed5cb5af52816940d45c6610e3e2e0d8c1d60a82..31f422b674b60c684414e5f4556c4eaa7dc5a2f6 100644 (file)
@@ -568,6 +568,16 @@ config KEXEC
          current kernel, and to start another kernel.  It is like a reboot
          but is independent of hardware/microcode support.
 
+config CRASH_DUMP
+       bool "kernel crash dumps"
+       depends on 64BIT
+       help
+         Generate crash dump after being started by kexec.
+         Crash dump kernels are loaded in the main kernel with kexec-tools
+         into a specially reserved region and then later executed after
+         a crash by kdump/kexec.
+         For more details see Documentation/kdump/kdump.txt
+
 config ZFCPDUMP
        def_bool n
        prompt "zfcpdump support"
index 97cc4403fabfe6a0a4ad0b21a4b9b5fa41df6e28..6940abfbe1d93aaab0189bb8e77a0b2b99f8caa7 100644 (file)
@@ -168,5 +168,6 @@ enum diag308_rc {
 
 extern int diag308(unsigned long subcode, void *addr);
 extern void diag308_reset(void);
+extern void store_status(void);
 
 #endif /* _ASM_S390_IPL_H */
index bb729b84a21e4c3c48b3a290676236c12d6a4616..fb1c96fa348cee2ea06cb7f4b002b7a76cb4c4e2 100644 (file)
@@ -30,6 +30,9 @@
 /* Not more than 2GB */
 #define KEXEC_CONTROL_MEMORY_LIMIT (1UL<<31)
 
+/* Maximum address we can use for the crash control pages */
+#define KEXEC_CRASH_CONTROL_MEMORY_LIMIT (-1UL)
+
 /* Allocate one page for the pdp and the second for the code */
 #define KEXEC_CONTROL_PAGE_SIZE 4096
 
index f584f4a525814821ea25a9a061ca5dc0d5e03df6..3d6ad4ad2a3f536a28d72164d43d57b887cb2724 100644 (file)
@@ -17,5 +17,5 @@ struct reset_call {
 
 extern void register_reset_call(struct reset_call *reset);
 extern void unregister_reset_call(struct reset_call *reset);
-extern void s390_reset_system(void);
+extern void s390_reset_system(void (*func)(void *), void *data);
 #endif /* _ASM_S390_RESET_H */
index d5e2ef10537d9a93882e3bc38b857cb2edf4f764..fb90e8af663da76eec9c9d918402972545dd97bb 100644 (file)
 #define IPL_DEVICE        (*(unsigned long *)  (0x10400))
 #define INITRD_START      (*(unsigned long *)  (0x10408))
 #define INITRD_SIZE       (*(unsigned long *)  (0x10410))
+#define OLDMEM_BASE      (*(unsigned long *)  (0x10418))
+#define OLDMEM_SIZE      (*(unsigned long *)  (0x10420))
 #endif /* __s390x__ */
 #define COMMAND_LINE      ((char *)            (0x10480))
 
 #define CHUNK_READ_WRITE 0
 #define CHUNK_READ_ONLY  1
+#define CHUNK_OLDMEM    4
+#define CHUNK_CRASHK    5
 
 struct mem_chunk {
        unsigned long addr;
@@ -48,6 +52,8 @@ extern int memory_end_set;
 extern unsigned long memory_end;
 
 void detect_memory_layout(struct mem_chunk chunk[]);
+void create_mem_hole(struct mem_chunk memory_chunk[], unsigned long addr,
+                    unsigned long size, int type);
 
 #define PRIMARY_SPACE_MODE     0
 #define ACCESS_REGISTER_MODE   1
@@ -106,6 +112,7 @@ extern unsigned int user_mode;
 #endif /* __s390x__ */
 
 #define ZFCPDUMP_HSA_SIZE      (32UL<<20)
+#define ZFCPDUMP_HSA_SIZE_MAX  (64UL<<20)
 
 /*
  * Console mode. Override with conmode=
@@ -138,6 +145,8 @@ extern char kernel_nss_name[];
 #define IPL_DEVICE        0x10400
 #define INITRD_START      0x10408
 #define INITRD_SIZE       0x10410
+#define OLDMEM_BASE      0x10418
+#define OLDMEM_SIZE      0x10420
 #endif /* __s390x__ */
 #define COMMAND_LINE      0x10480
 
index df3732249baafa3f0c30474d26da5243412890d1..dd4f076409190c22a0154e9bac693e7b2a75f3b9 100644 (file)
@@ -48,6 +48,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
 obj-$(CONFIG_DYNAMIC_FTRACE)   += ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
+obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
index 255435663bf820299734afdce573fa52050e9d87..f8828d38fa6ec62d3ccd82f46c5228a21e519955 100644 (file)
@@ -86,6 +86,8 @@ s390_base_pgm_handler_fn:
 ENTRY(diag308_reset)
        larl    %r4,.Lctlregs           # Save control registers
        stctg   %c0,%c15,0(%r4)
+       larl    %r4,.Lfpctl             # Floating point control register
+       stfpc   0(%r4)
        larl    %r4,.Lrestart_psw       # Setup restart PSW at absolute 0
        lghi    %r3,0
        lg      %r4,0(%r4)              # Save PSW
@@ -99,6 +101,8 @@ ENTRY(diag308_reset)
        sam64                           # Switch to 64 bit addressing mode
        larl    %r4,.Lctlregs           # Restore control registers
        lctlg   %c0,%c15,0(%r4)
+       larl    %r4,.Lfpctl             # Restore floating point ctl register
+       lfpc    0(%r4)
        br      %r14
 .align 16
 .Lrestart_psw:
@@ -110,6 +114,8 @@ ENTRY(diag308_reset)
        .rept   16
        .quad   0
        .endr
+.Lfpctl:
+       .long   0
        .previous
 
 #else /* CONFIG_64BIT */
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
new file mode 100644 (file)
index 0000000..c63dd71
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * S390 kdump implementation
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#include <linux/crash_dump.h>
+#include <asm/lowcore.h>
+
+/*
+ * Copy one page from "oldmem"
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ *  - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
+ *  - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+                        size_t csize, unsigned long offset, int userbuf)
+{
+       unsigned long src;
+       int rc;
+
+       if (!csize)
+               return 0;
+
+       src = (pfn << PAGE_SHIFT) + offset;
+       if (src < OLDMEM_SIZE)
+               src += OLDMEM_BASE;
+       else if (src > OLDMEM_BASE &&
+                src < OLDMEM_BASE + OLDMEM_SIZE)
+               src -= OLDMEM_BASE;
+       if (userbuf)
+               rc = copy_to_user_real((void __user *) buf, (void *) src,
+                                      csize);
+       else
+               rc = memcpy_real(buf, (void *) src, csize);
+       return rc < 0 ? rc : csize;
+}
index 2d781bab37bbcefd69fd9af74e49a66ac0f42293..a6f0e466648dfa655048705e7e148b2dec3d1f73 100644 (file)
@@ -449,10 +449,22 @@ ENTRY(start)
 #
        .org    0x10000
 ENTRY(startup)
+       j       .Lep_startup_normal
+       .org    0x10008
+       .ascii  "S390EP"
+       .byte   0x00,0x01
+#
+# kdump startup-code at 0x10010, running in 64 bit absolute addressing mode
+#
+       .org    0x10010
+ENTRY(startup_kdump)
+       j       .Lep_startup_kdump
+.Lep_startup_normal:
        basr    %r13,0                  # get base
 .LPG0:
        xc      0x200(256),0x200        # partially clear lowcore
        xc      0x300(256),0x300
+       xc      0xe00(256),0xe00
        stck    __LC_LAST_UPDATE_CLOCK
        spt     5f-.LPG0(%r13)
        mvc     __LC_LAST_UPDATE_TIMER(8),5f-.LPG0(%r13)
@@ -534,6 +546,8 @@ ENTRY(startup)
        .align  8
 5:     .long   0x7fffffff,0xffffffff
 
+#include "head_kdump.S"
+
 #
 # params at 10400 (setup.h)
 #
@@ -541,6 +555,8 @@ ENTRY(startup)
        .long   0,0                     # IPL_DEVICE
        .long   0,0                     # INITRD_START
        .long   0,0                     # INITRD_SIZE
+       .long   0,0                     # OLDMEM_BASE
+       .long   0,0                     # OLDMEM_SIZE
 
        .org    COMMAND_LINE
        .byte   "root=/dev/ram0 ro"
diff --git a/arch/s390/kernel/head_kdump.S b/arch/s390/kernel/head_kdump.S
new file mode 100644 (file)
index 0000000..9d5ed9e
--- /dev/null
@@ -0,0 +1,116 @@
+/*
+ * S390 kdump lowlevel functions (new kernel)
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com>
+ */
+
+#define DATAMOVER_ADDR 0x4000
+#define COPY_PAGE_ADDR 0x6000
+
+#ifdef CONFIG_CRASH_DUMP
+
+#
+# kdump entry (new kernel - not yet relocated)
+#
+# Note: This code has to be position independent
+#
+
+.align 2
+.Lep_startup_kdump:
+       basr    %r13,0
+.Lbase:
+       larl    %r2,.Lbase_addr                 # Check, if we have been
+       lg      %r2,0(%r2)                      # already relocated:
+       clgr    %r2,%r13                        #
+       jne     .Lrelocate                      # No : Start data mover
+       lghi    %r2,0                           # Yes: Start kdump kernel
+       brasl   %r14,startup_kdump_relocated
+
+.Lrelocate:
+       larl    %r4,startup
+       lg      %r2,0x418(%r4)                  # Get kdump base
+       lg      %r3,0x420(%r4)                  # Get kdump size
+
+       larl    %r10,.Lcopy_start               # Source of data mover
+       lghi    %r8,DATAMOVER_ADDR              # Target of data mover
+       mvc     0(256,%r8),0(%r10)              # Copy data mover code
+
+       agr     %r8,%r2                         # Copy data mover to
+       mvc     0(256,%r8),0(%r10)              # reserved mem
+
+       lghi    %r14,DATAMOVER_ADDR             # Jump to copied data mover
+       basr    %r14,%r14
+.Lbase_addr:
+       .quad   .Lbase
+
+#
+# kdump data mover code (runs at address DATAMOVER_ADDR)
+#
+# r2: kdump base address
+# r3: kdump size
+#
+.Lcopy_start:
+       basr    %r13,0                          # Base
+0:
+       lgr     %r11,%r2                        # Save kdump base address
+       lgr     %r12,%r2
+       agr     %r12,%r3                        # Compute kdump end address
+
+       lghi    %r5,0
+       lghi    %r10,COPY_PAGE_ADDR             # Load copy page address
+1:
+       mvc     0(256,%r10),0(%r5)              # Copy old kernel to tmp
+       mvc     0(256,%r5),0(%r11)              # Copy new kernel to old
+       mvc     0(256,%r11),0(%r10)             # Copy tmp to new
+       aghi    %r11,256
+       aghi    %r5,256
+       clgr    %r11,%r12
+       jl      1b
+
+       lg      %r14,.Lstartup_kdump-0b(%r13)
+       basr    %r14,%r14                       # Start relocated kernel
+.Lstartup_kdump:
+       .long   0x00000000,0x00000000 + startup_kdump_relocated
+.Lcopy_end:
+
+#
+# Startup of kdump (relocated new kernel)
+#
+.align 2
+startup_kdump_relocated:
+       basr    %r13,0
+0:
+       mvc     0(8,%r0),.Lrestart_psw-0b(%r13) # Setup restart PSW
+       mvc     464(16,%r0),.Lpgm_psw-0b(%r13)  # Setup pgm check PSW
+       lhi     %r1,1                           # Start new kernel
+       diag    %r1,%r1,0x308                   # with diag 308
+
+.Lno_diag308:                                  # No diag 308
+       sam31                                   # Switch to 31 bit addr mode
+       sr      %r1,%r1                         # Erase register r1
+       sr      %r2,%r2                         # Erase register r2
+       sigp    %r1,%r2,0x12                    # Switch to 31 bit arch mode
+       lpsw    0                               # Start new kernel...
+.align 8
+.Lrestart_psw:
+       .long   0x00080000,0x80000000 + startup
+.Lpgm_psw:
+       .quad   0x0000000180000000,0x0000000000000000 + .Lno_diag308
+#else
+.align 2
+.Lep_startup_kdump:
+#ifdef CONFIG_64BIT
+       larl    %r13,startup_kdump_crash
+       lpswe   0(%r13)
+.align 8
+startup_kdump_crash:
+       .quad   0x0002000080000000,0x0000000000000000 + startup_kdump_crash
+#else
+       basr    %r13,0
+0:     lpsw    startup_kdump_crash-0b(%r13)
+.align 8
+startup_kdump_crash:
+       .long   0x000a0000,0x00000000 + startup_kdump_crash
+#endif /* CONFIG_64BIT */
+#endif /* CONFIG_CRASH_DUMP */
index 90769b4bc7f643063075fbde1f38566cd5f955fb..ca0520c52547f0f70c4a7be21bd7f6e559dbcfae 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/ctype.h>
 #include <linux/fs.h>
 #include <linux/gfp.h>
+#include <linux/crash_dump.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -1740,6 +1741,9 @@ void do_restart(void)
 {
        smp_restart_with_online_cpu();
        smp_send_stop();
+#ifdef CONFIG_CRASH_DUMP
+       crash_kexec(NULL);
+#endif
        on_restart_trigger.action->fn(&on_restart_trigger);
        stop_run(&on_restart_trigger);
 }
@@ -2010,7 +2014,7 @@ static void do_reset_calls(void)
 
 u32 dump_prefix_page;
 
-void s390_reset_system(void)
+void s390_reset_system(void (*func)(void *), void *data)
 {
        struct _lowcore *lc;
 
@@ -2038,6 +2042,10 @@ void s390_reset_system(void)
        S390_lowcore.program_new_psw.addr =
                PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
 
+       /* Store status at absolute zero */
+       store_status();
+
        do_reset_calls();
+       if (func)
+               func(data);
 }
-
index b09b9c62573e7bb77c145670dbd3e0e777ea8cdd..7cbac50c956476209de062b065e9ee945e08f069 100644 (file)
@@ -1,10 +1,11 @@
 /*
  * arch/s390/kernel/machine_kexec.c
  *
- * Copyright IBM Corp. 2005,2006
+ * Copyright IBM Corp. 2005,2011
  *
  * Author(s): Rolf Adelsberger,
  *           Heiko Carstens <heiko.carstens@de.ibm.com>
+ *           Michael Holzheu <holzheu@linux.vnet.ibm.com>
  */
 
 #include <linux/device.h>
 #include <asm/smp.h>
 #include <asm/reset.h>
 #include <asm/ipl.h>
+#include <asm/diag.h>
 
 typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
 
 extern const unsigned char relocate_kernel[];
 extern const unsigned long long relocate_kernel_len;
 
+#ifdef CONFIG_CRASH_DUMP
+
+#define ROUNDUP(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
+#define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
+
+#ifndef NT_FPREGSET
+#define NT_FPREGSET 2
+#endif
+
+/*
+ * fpregset ELF Note
+ */
+struct nt_fpregset_64 {
+       u32     fpc;
+       u32     pad;
+       u64     fprs[16];
+} __packed;
+
+/*
+ * Initialize ELF note
+ */
+static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len,
+                    const char *name)
+{
+       Elf64_Nhdr *note;
+       u64 len;
+
+       note = (Elf64_Nhdr *)buf;
+       note->n_namesz = strlen(name) + 1;
+       note->n_descsz = d_len;
+       note->n_type = type;
+       len = sizeof(Elf64_Nhdr);
+
+       memcpy(buf + len, name, note->n_namesz);
+       len = ROUNDUP(len + note->n_namesz, 4);
+
+       memcpy(buf + len, desc, note->n_descsz);
+       len = ROUNDUP(len + note->n_descsz, 4);
+
+       return PTR_ADD(buf, len);
+}
+
+/*
+ * Initialize prstatus note
+ */
+static void *nt_prstatus(void *ptr, struct save_area *sa)
+{
+       struct elf_prstatus nt_prstatus;
+       static int cpu_nr = 1;
+
+       memset(&nt_prstatus, 0, sizeof(nt_prstatus));
+       memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs));
+       memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw));
+       memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs));
+       nt_prstatus.pr_pid = cpu_nr;
+       cpu_nr++;
+
+       return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus),
+                        "CORE");
+}
+
+/*
+ * Initialize fpregset (floating point) note
+ */
+static void *nt_fpregset(void *ptr, struct save_area *sa)
+{
+       struct nt_fpregset_64 nt_fpregset;
+
+       memset(&nt_fpregset, 0, sizeof(nt_fpregset));
+       memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg));
+       memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs));
+
+       return nt_init(ptr, NT_FPREGSET, &nt_fpregset, sizeof(nt_fpregset),
+                        "CORE");
+}
+
+/*
+ * Initialize timer note
+ */
+static void *nt_s390_timer(void *ptr, struct save_area *sa)
+{
+       return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer),
+                        KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD clock comparator note
+ */
+static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa)
+{
+       return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp,
+                      sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize TOD programmable register note
+ */
+static void *nt_s390_tod_preg(void *ptr, struct save_area *sa)
+{
+       return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg,
+                      sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize control register note
+ */
+static void *nt_s390_ctrs(void *ptr, struct save_area *sa)
+{
+       return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs,
+                      sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize prefix register note
+ */
+static void *nt_s390_prefix(void *ptr, struct save_area *sa)
+{
+       return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg,
+                        sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Final empty node
+ */
+static void nt_final(void *ptr)
+{
+       memset(ptr, 0, sizeof(struct elf_note));
+}
+
+/*
+ * Add create ELF notes for CPU
+ */
+static void add_elf_notes(int cpu)
+{
+       struct save_area *sa = (void *) 4608 + store_prefix();
+       void *ptr;
+
+       memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
+       ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
+       ptr = nt_prstatus(ptr, sa);
+       ptr = nt_fpregset(ptr, sa);
+       ptr = nt_s390_timer(ptr, sa);
+       ptr = nt_s390_tod_cmp(ptr, sa);
+       ptr = nt_s390_tod_preg(ptr, sa);
+       ptr = nt_s390_ctrs(ptr, sa);
+       ptr = nt_s390_prefix(ptr, sa);
+       nt_final(ptr);
+}
+
+/*
+ * Store status of next available physical CPU
+ */
+static int store_status_next(int start_cpu, int this_cpu)
+{
+       struct save_area *sa = (void *) 4608 + store_prefix();
+       int cpu, rc;
+
+       for (cpu = start_cpu; cpu < 65536; cpu++) {
+               if (cpu == this_cpu)
+                       continue;
+               do {
+                       rc = raw_sigp(cpu, sigp_stop_and_store_status);
+               } while (rc == sigp_busy);
+               if (rc != sigp_order_code_accepted)
+                       continue;
+               if (sa->pref_reg)
+                       return cpu;
+       }
+       return -1;
+}
+
+/*
+ * Initialize CPU ELF notes
+ */
+void setup_regs(void)
+{
+       int cpu, this_cpu, phys_cpu = 0, first = 1;
+
+       this_cpu = stap();
+
+       if (!S390_lowcore.prefixreg_save_area)
+               first = 0;
+       for_each_online_cpu(cpu) {
+               if (first) {
+                       add_elf_notes(cpu);
+                       first = 0;
+                       continue;
+               }
+               phys_cpu = store_status_next(phys_cpu, this_cpu);
+               if (phys_cpu == -1)
+                       return;
+               add_elf_notes(cpu);
+               phys_cpu++;
+       }
+}
+
+#endif
+
+/*
+ * Start kdump: We expect here that a store status has been done on our CPU
+ */
+static void __do_machine_kdump(void *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+       int (*start_kdump)(int) = (void *)((struct kimage *) image)->start;
+
+       __load_psw_mask(PSW_BASE_BITS | PSW_DEFAULT_KEY);
+       setup_regs();
+       start_kdump(1);
+#endif
+}
+
+/*
+ * Check if kdump checksums are valid: We call purgatory with parameter "0"
+ */
+static int kdump_csum_valid(struct kimage *image)
+{
+#ifdef CONFIG_CRASH_DUMP
+       int (*start_kdump)(int) = (void *)image->start;
+       int rc;
+
+       __arch_local_irq_stnsm(0xfb); /* disable DAT */
+       rc = start_kdump(0);
+       __arch_local_irq_stosm(0x04); /* enable DAT */
+       return rc ? 0 : -EINVAL;
+#else
+       return -EINVAL;
+#endif
+}
+
+/*
+ * Give back memory to hypervisor before new kdump is loaded
+ */
+static int machine_kexec_prepare_kdump(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+       if (MACHINE_IS_VM)
+               diag10_range(PFN_DOWN(crashk_res.start),
+                            PFN_DOWN(crashk_res.end - crashk_res.start + 1));
+       return 0;
+#else
+       return -EINVAL;
+#endif
+}
+
 int machine_kexec_prepare(struct kimage *image)
 {
        void *reboot_code_buffer;
@@ -35,6 +282,9 @@ int machine_kexec_prepare(struct kimage *image)
        if (ipl_flags & IPL_NSS_VALID)
                return -ENOSYS;
 
+       if (image->type == KEXEC_TYPE_CRASH)
+               return machine_kexec_prepare_kdump();
+
        /* We don't support anything but the default image type for now. */
        if (image->type != KEXEC_TYPE_DEFAULT)
                return -EINVAL;
@@ -55,23 +305,43 @@ void machine_shutdown(void)
 {
 }
 
-static void __machine_kexec(void *data)
+/*
+ * Do normal kexec
+ */
+static void __do_machine_kexec(void *data)
 {
        relocate_kernel_t data_mover;
        struct kimage *image = data;
 
-       pfault_fini();
-       s390_reset_system();
-
        data_mover = (relocate_kernel_t) page_to_phys(image->control_code_page);
 
        /* Call the moving routine */
        (*data_mover)(&image->head, image->start);
-       for (;;);
 }
 
+/*
+ * Reset system and call either kdump or normal kexec
+ */
+static void __machine_kexec(void *data)
+{
+       struct kimage *image = data;
+
+       pfault_fini();
+       if (image->type == KEXEC_TYPE_CRASH)
+               s390_reset_system(__do_machine_kdump, data);
+       else
+               s390_reset_system(__do_machine_kexec, data);
+       disabled_wait((unsigned long) __builtin_return_address(0));
+}
+
+/*
+ * Do either kdump or normal kexec. In case of kdump we first ask
+ * purgatory, if kdump checksums are valid.
+ */
 void machine_kexec(struct kimage *image)
 {
+       if (image->type == KEXEC_TYPE_CRASH && !kdump_csum_valid(image))
+               return;
        tracer_disable();
        smp_send_stop();
        smp_switch_to_ipl_cpu(__machine_kexec, image);
index 0fbe4e32f7ba298c83b22dfe831b66545fba8ace..19b4568f4ceec4ada5aa86db7251a2b8dc0afa75 100644 (file)
@@ -62,3 +62,72 @@ void detect_memory_layout(struct mem_chunk chunk[])
        arch_local_irq_restore(flags);
 }
 EXPORT_SYMBOL(detect_memory_layout);
+
+/*
+ * Create memory hole with given address, size, and type
+ */
+void create_mem_hole(struct mem_chunk chunks[], unsigned long addr,
+                    unsigned long size, int type)
+{
+       unsigned long start, end, new_size;
+       int i;
+
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               if (chunks[i].size == 0)
+                       continue;
+               if (addr + size < chunks[i].addr)
+                       continue;
+               if (addr >= chunks[i].addr + chunks[i].size)
+                       continue;
+               start = max(addr, chunks[i].addr);
+               end = min(addr + size, chunks[i].addr + chunks[i].size);
+               new_size = end - start;
+               if (new_size == 0)
+                       continue;
+               if (start == chunks[i].addr &&
+                   end == chunks[i].addr + chunks[i].size) {
+                       /* Remove chunk */
+                       chunks[i].type = type;
+               } else if (start == chunks[i].addr) {
+                       /* Make chunk smaller at start */
+                       if (i >= MEMORY_CHUNKS - 1)
+                               panic("Unable to create memory hole");
+                       memmove(&chunks[i + 1], &chunks[i],
+                               sizeof(struct mem_chunk) *
+                               (MEMORY_CHUNKS - (i + 1)));
+                       chunks[i + 1].addr = chunks[i].addr + new_size;
+                       chunks[i + 1].size = chunks[i].size - new_size;
+                       chunks[i].size = new_size;
+                       chunks[i].type = type;
+                       i += 1;
+               } else if (end == chunks[i].addr + chunks[i].size) {
+                       /* Make chunk smaller at end */
+                       if (i >= MEMORY_CHUNKS - 1)
+                               panic("Unable to create memory hole");
+                       memmove(&chunks[i + 1], &chunks[i],
+                               sizeof(struct mem_chunk) *
+                               (MEMORY_CHUNKS - (i + 1)));
+                       chunks[i + 1].addr = start;
+                       chunks[i + 1].size = new_size;
+                       chunks[i + 1].type = type;
+                       chunks[i].size -= new_size;
+                       i += 1;
+               } else {
+                       /* Create memory hole */
+                       if (i >= MEMORY_CHUNKS - 2)
+                               panic("Unable to create memory hole");
+                       memmove(&chunks[i + 2], &chunks[i],
+                               sizeof(struct mem_chunk) *
+                               (MEMORY_CHUNKS - (i + 2)));
+                       chunks[i + 1].addr = addr;
+                       chunks[i + 1].size = size;
+                       chunks[i + 1].type = type;
+                       chunks[i + 2].addr = addr + size;
+                       chunks[i + 2].size =
+                               chunks[i].addr + chunks[i].size - (addr + size);
+                       chunks[i + 2].type = chunks[i].type;
+                       chunks[i].size = addr - chunks[i].addr;
+                       i += 2;
+               }
+       }
+}
index 303d961c3bb5ca8ac7b5a4f2eac9395dc9cf8d5d..ad67c214be047df4c6ddb02f65b0b645de152258 100644 (file)
@@ -9,6 +9,12 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 
+#
+# store_status: Empty implementation until kdump is supported on 31 bit
+#
+ENTRY(store_status)
+               br      %r14
+
 #
 # do_reipl_asm
 # Parameter: r2 = schid of reipl device
index e690975403f43c9a8ac9d5dd2e8fa0ae3429722a..a0f5b686a3cd67b8226eeebd452e45a509b6e05e 100644 (file)
@@ -62,8 +62,11 @@ ENTRY(store_status)
        larl    %r2,store_status
        stg     %r2,__LC_PSW_SAVE_AREA-SAVE_AREA_BASE + 8(%r1)
        br      %r14
-.align 8
+
+       .section .bss
+       .align  8
 .Lclkcmp:      .quad   0x0000000000000000
+       .previous
 
 #
 # do_reipl_asm
index 7b371c37061de424892068ea7838a946f4f95e14..9d9ba728cda174ec1ded6d5964428bb98b54ce2a 100644 (file)
@@ -42,6 +42,9 @@
 #include <linux/reboot.h>
 #include <linux/topology.h>
 #include <linux/ftrace.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
+#include <linux/memory.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -57,6 +60,7 @@
 #include <asm/ebcdic.h>
 #include <asm/compat.h>
 #include <asm/kvm_virtio.h>
+#include <asm/diag.h>
 
 long psw_kernel_bits   = (PSW_BASE_BITS | PSW_MASK_DAT | PSW_ASC_PRIMARY |
                           PSW_MASK_MCHECK | PSW_DEFAULT_KEY);
@@ -435,6 +439,9 @@ static void __init setup_resources(void)
        for (i = 0; i < MEMORY_CHUNKS; i++) {
                if (!memory_chunk[i].size)
                        continue;
+               if (memory_chunk[i].type == CHUNK_OLDMEM ||
+                   memory_chunk[i].type == CHUNK_CRASHK)
+                       continue;
                res = alloc_bootmem_low(sizeof(*res));
                res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
                switch (memory_chunk[i].type) {
@@ -479,6 +486,7 @@ static void __init setup_memory_end(void)
        unsigned long max_mem;
        int i;
 
+
 #ifdef CONFIG_ZFCPDUMP
        if (ipl_info.type == IPL_TYPE_FCP_DUMP) {
                memory_end = ZFCPDUMP_HSA_SIZE;
@@ -550,6 +558,187 @@ static void __init setup_restart_psw(void)
        copy_to_absolute_zero(&S390_lowcore.restart_psw, &psw, sizeof(psw));
 }
 
+#ifdef CONFIG_CRASH_DUMP
+
+/*
+ * Find suitable location for crashkernel memory
+ */
+static unsigned long __init find_crash_base(unsigned long crash_size,
+                                           char **msg)
+{
+       unsigned long crash_base;
+       struct mem_chunk *chunk;
+       int i;
+
+       if (memory_chunk[0].size < crash_size) {
+               *msg = "first memory chunk must be at least crashkernel size";
+               return 0;
+       }
+       if (is_kdump_kernel() && (crash_size == OLDMEM_SIZE))
+               return OLDMEM_BASE;
+
+       for (i = MEMORY_CHUNKS - 1; i >= 0; i--) {
+               chunk = &memory_chunk[i];
+               if (chunk->size == 0)
+                       continue;
+               if (chunk->type != CHUNK_READ_WRITE)
+                       continue;
+               if (chunk->size < crash_size)
+                       continue;
+               crash_base = (chunk->addr + chunk->size) - crash_size;
+               if (crash_base < crash_size)
+                       continue;
+               if (crash_base < ZFCPDUMP_HSA_SIZE_MAX)
+                       continue;
+               if (crash_base < (unsigned long) INITRD_START + INITRD_SIZE)
+                       continue;
+               return crash_base;
+       }
+       *msg = "no suitable area found";
+       return 0;
+}
+
+/*
+ * Check if crash_base and crash_size is valid
+ */
+static int __init verify_crash_base(unsigned long crash_base,
+                                   unsigned long crash_size,
+                                   char **msg)
+{
+       struct mem_chunk *chunk;
+       int i;
+
+       /*
+        * Because we do the swap to zero, we must have at least 'crash_size'
+        * bytes free space before crash_base
+        */
+       if (crash_size > crash_base) {
+               *msg = "crashkernel offset must be greater than size";
+               return -EINVAL;
+       }
+
+       /* First memory chunk must be at least crash_size */
+       if (memory_chunk[0].size < crash_size) {
+               *msg = "first memory chunk must be at least crashkernel size";
+               return -EINVAL;
+       }
+       /* Check if we fit into the respective memory chunk */
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               chunk = &memory_chunk[i];
+               if (chunk->size == 0)
+                       continue;
+               if (crash_base < chunk->addr)
+                       continue;
+               if (crash_base >= chunk->addr + chunk->size)
+                       continue;
+               /* we have found the memory chunk */
+               if (crash_base + crash_size > chunk->addr + chunk->size) {
+                       *msg = "selected memory chunk is too small for "
+                               "crashkernel memory";
+                       return -EINVAL;
+               }
+               return 0;
+       }
+       *msg = "invalid memory range specified";
+       return -EINVAL;
+}
+
+/*
+ * Reserve kdump memory by creating a memory hole in the mem_chunk array
+ */
+static void __init reserve_kdump_bootmem(unsigned long addr, unsigned long size,
+                                        int type)
+{
+
+       create_mem_hole(memory_chunk, addr, size, type);
+}
+
+/*
+ * When kdump is enabled, we have to ensure that no memory from
+ * the area [0 - crashkernel memory size] and
+ * [crashk_res.start - crashk_res.end] is set offline.
+ */
+static int kdump_mem_notifier(struct notifier_block *nb,
+                             unsigned long action, void *data)
+{
+       struct memory_notify *arg = data;
+
+       if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
+               return NOTIFY_BAD;
+       if (arg->start_pfn > PFN_DOWN(crashk_res.end))
+               return NOTIFY_OK;
+       if (arg->start_pfn + arg->nr_pages - 1 < PFN_DOWN(crashk_res.start))
+               return NOTIFY_OK;
+       return NOTIFY_BAD;
+}
+
+static struct notifier_block kdump_mem_nb = {
+       .notifier_call = kdump_mem_notifier,
+};
+
+#endif
+
+/*
+ * Make sure that oldmem, where the dump is stored, is protected
+ */
+static void reserve_oldmem(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+       if (!is_kdump_kernel())
+               return;
+
+       reserve_kdump_bootmem(OLDMEM_BASE, OLDMEM_SIZE, CHUNK_OLDMEM);
+       reserve_kdump_bootmem(OLDMEM_SIZE, memory_end - OLDMEM_SIZE,
+                             CHUNK_OLDMEM);
+       if (OLDMEM_BASE + OLDMEM_SIZE == real_memory_size)
+               saved_max_pfn = PFN_DOWN(OLDMEM_BASE) - 1;
+       else
+               saved_max_pfn = PFN_DOWN(real_memory_size) - 1;
+#endif
+}
+
+/*
+ * Reserve memory for kdump kernel to be loaded with kexec
+ */
+static void __init reserve_crashkernel(void)
+{
+#ifdef CONFIG_CRASH_DUMP
+       unsigned long long crash_base, crash_size;
+       char *msg;
+       int rc;
+
+       rc = parse_crashkernel(boot_command_line, memory_end, &crash_size,
+                              &crash_base);
+       if (rc || crash_size == 0)
+               return;
+       crash_base = PAGE_ALIGN(crash_base);
+       crash_size = PAGE_ALIGN(crash_size);
+       if (register_memory_notifier(&kdump_mem_nb))
+               return;
+       if (!crash_base)
+               crash_base = find_crash_base(crash_size, &msg);
+       if (!crash_base) {
+               pr_info("crashkernel reservation failed: %s\n", msg);
+               unregister_memory_notifier(&kdump_mem_nb);
+               return;
+       }
+       if (verify_crash_base(crash_base, crash_size, &msg)) {
+               pr_info("crashkernel reservation failed: %s\n", msg);
+               unregister_memory_notifier(&kdump_mem_nb);
+               return;
+       }
+       if (!is_kdump_kernel() && MACHINE_IS_VM)
+               diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
+       crashk_res.start = crash_base;
+       crashk_res.end = crash_base + crash_size - 1;
+       insert_resource(&iomem_resource, &crashk_res);
+       reserve_kdump_bootmem(crash_base, crash_size, CHUNK_READ_WRITE);
+       pr_info("Reserving %lluMB of memory at %lluMB "
+               "for crashkernel (System RAM: %luMB)\n",
+               crash_size >> 20, crash_base >> 20, memory_end >> 20);
+#endif
+}
+
 static void __init
 setup_memory(void)
 {
@@ -580,6 +769,14 @@ setup_memory(void)
                if (PFN_PHYS(start_pfn) + bmap_size > INITRD_START) {
                        start = PFN_PHYS(start_pfn) + bmap_size + PAGE_SIZE;
 
+#ifdef CONFIG_CRASH_DUMP
+                       if (is_kdump_kernel()) {
+                               /* Move initrd behind kdump oldmem */
+                               if (start + INITRD_SIZE > OLDMEM_BASE &&
+                                   start < OLDMEM_BASE + OLDMEM_SIZE)
+                                       start = OLDMEM_BASE + OLDMEM_SIZE;
+                       }
+#endif
                        if (start + INITRD_SIZE > memory_end) {
                                pr_err("initrd extends beyond end of "
                                       "memory (0x%08lx > 0x%08lx) "
@@ -644,6 +841,15 @@ setup_memory(void)
        reserve_bootmem(start_pfn << PAGE_SHIFT, bootmap_size,
                        BOOTMEM_DEFAULT);
 
+#ifdef CONFIG_CRASH_DUMP
+       if (crashk_res.start)
+               reserve_bootmem(crashk_res.start,
+                               crashk_res.end - crashk_res.start + 1,
+                               BOOTMEM_DEFAULT);
+       if (is_kdump_kernel())
+               reserve_bootmem(elfcorehdr_addr - OLDMEM_BASE,
+                               PAGE_ALIGN(elfcorehdr_size), BOOTMEM_DEFAULT);
+#endif
 #ifdef CONFIG_BLK_DEV_INITRD
        if (INITRD_START && INITRD_SIZE) {
                if (INITRD_START + INITRD_SIZE <= memory_end) {
@@ -812,6 +1018,8 @@ setup_arch(char **cmdline_p)
        setup_ipl();
        setup_memory_end();
        setup_addressing_mode();
+       reserve_oldmem();
+       reserve_crashkernel();
        setup_memory();
        setup_resources();
        setup_restart_psw();
index 781ff51695602cf215bf462fe2c1d9358d26575d..4799383e2df9551c45ad69f08f57455bb9771dc0 100644 (file)
@@ -335,6 +335,9 @@ void __init vmem_map_init(void)
        ro_start = ((unsigned long)&_stext) & PAGE_MASK;
        ro_end = PFN_ALIGN((unsigned long)&_eshared);
        for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+               if (memory_chunk[i].type == CHUNK_CRASHK ||
+                   memory_chunk[i].type == CHUNK_OLDMEM)
+                       continue;
                start = memory_chunk[i].addr;
                end = memory_chunk[i].addr + memory_chunk[i].size;
                if (start >= ro_end || end <= ro_start)
@@ -368,6 +371,9 @@ static int __init vmem_convert_memory_chunk(void)
        for (i = 0; i < MEMORY_CHUNKS; i++) {
                if (!memory_chunk[i].size)
                        continue;
+               if (memory_chunk[i].type == CHUNK_CRASHK ||
+                   memory_chunk[i].type == CHUNK_OLDMEM)
+                       continue;
                seg = kzalloc(sizeof(*seg), GFP_KERNEL);
                if (!seg)
                        panic("Out of memory...\n");
index 5586c1376cb064e562e394f7893da99fa5953faf..dc67c397449e529f7cb377f776df6784d72944c1 100644 (file)
@@ -1069,7 +1069,7 @@ void reipl_ccw_dev(struct ccw_dev_id *devid)
 {
        struct subchannel_id schid;
 
-       s390_reset_system();
+       s390_reset_system(NULL, NULL);
        if (reipl_find_schid(devid, &schid) != 0)
                panic("IPL Device not found\n");
        do_reipl_asm(*((__u32*)&schid));