arch/powerpc/kernel/setup_64.c

   1 /*
   2  *
   3  * Common boot and setup code.
   4  *
   5  * Copyright (C) 2001 PPC64 Team, IBM Corp
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  */
  12
  13 #define DEBUG
  14
  15 #include <linux/export.h>
  16 #include <linux/string.h>
  17 #include <linux/sched.h>
  18 #include <linux/init.h>
  19 #include <linux/kernel.h>
  20 #include <linux/reboot.h>
  21 #include <linux/delay.h>
  22 #include <linux/initrd.h>
  23 #include <linux/seq_file.h>
  24 #include <linux/ioport.h>
  25 #include <linux/console.h>
  26 #include <linux/utsname.h>
  27 #include <linux/tty.h>
  28 #include <linux/root_dev.h>
  29 #include <linux/notifier.h>
  30 #include <linux/cpu.h>
  31 #include <linux/unistd.h>
  32 #include <linux/serial.h>
  33 #include <linux/serial_8250.h>
  34 #include <linux/bootmem.h>
  35 #include <linux/pci.h>
  36 #include <linux/lockdep.h>
  37 #include <linux/memblock.h>
  38 #include <linux/memory.h>
  39 #include <linux/nmi.h>
  40
  41 #include <asm/io.h>
  42 #include <asm/kdump.h>
  43 #include <asm/prom.h>
  44 #include <asm/processor.h>
  45 #include <asm/pgtable.h>
  46 #include <asm/smp.h>
  47 #include <asm/elf.h>
  48 #include <asm/machdep.h>
  49 #include <asm/paca.h>
  50 #include <asm/time.h>
  51 #include <asm/cputable.h>
  52 #include <asm/sections.h>
  53 #include <asm/btext.h>
  54 #include <asm/nvram.h>
  55 #include <asm/setup.h>
  56 #include <asm/rtas.h>
  57 #include <asm/iommu.h>
  58 #include <asm/serial.h>
  59 #include <asm/cache.h>
  60 #include <asm/page.h>
  61 #include <asm/mmu.h>
  62 #include <asm/firmware.h>
  63 #include <asm/xmon.h>
  64 #include <asm/udbg.h>
  65 #include <asm/kexec.h>
  66 #include <asm/code-patching.h>
  67 #include <asm/livepatch.h>
  68 #include <asm/opal.h>
  69 #include <asm/cputhreads.h>
  70
  71 #ifdef DEBUG
  72 #define DBG(fmt...) udbg_printf(fmt)
  73 #else
  74 #define DBG(fmt...)
  75 #endif
  76
  77 int spinning_secondaries;
  78 u64 ppc64_pft_size;
  79
  80 struct ppc64_caches ppc64_caches = {
  81         .l1d = {
  82                 .block_size = 0x40,
  83                 .log_block_size = 6,
  84         },
  85         .l1i = {
  86                 .block_size = 0x40,
  87                 .log_block_size = 6
  88         },
  89 };
  90 EXPORT_SYMBOL_GPL(ppc64_caches);
  91
  92 #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
  93 void __init setup_tlb_core_data(void)
  94 {
  95         int cpu;
  96
  97         BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
  98
  99         for_each_possible_cpu(cpu) {
 100                 int first = cpu_first_thread_sibling(cpu);
 101
 102                 /*
 103                  * If we boot via kdump on a non-primary thread,
 104                  * make sure we point at the thread that actually
 105                  * set up this TLB.
 106                  */
 107                 if (cpu_first_thread_sibling(boot_cpuid) == first)
 108                         first = boot_cpuid;
 109
 110                 paca[cpu].tcd_ptr = &paca[first].tcd;
 111
 112                 /*
 113                  * If we have threads, we need either tlbsrx.
 114                  * or e6500 tablewalk mode, or else TLB handlers
 115                  * will be racy and could produce duplicate entries.
 116                  * Should we panic instead?
 117                  */
 118                 WARN_ONCE(smt_enabled_at_boot >= 2 &&
 119                           !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
 120                           book3e_htw_mode != PPC_HTW_E6500,
 121                           "%s: unsupported MMU configuration\n", __func__);
 122         }
 123 }
 124 #endif
 125
 126 #ifdef CONFIG_SMP
 127
 128 static char *smt_enabled_cmdline;
 129
 130 /* Look for ibm,smt-enabled OF option */
 131 void __init check_smt_enabled(void)
 132 {
 133         struct device_node *dn;
 134         const char *smt_option;
 135
 136         /* Default to enabling all threads */
 137         smt_enabled_at_boot = threads_per_core;
 138
 139         /* Allow the command line to overrule the OF option */
 140         if (smt_enabled_cmdline) {
 141                 if (!strcmp(smt_enabled_cmdline, "on"))
 142                         smt_enabled_at_boot = threads_per_core;
 143                 else if (!strcmp(smt_enabled_cmdline, "off"))
 144                         smt_enabled_at_boot = 0;
 145                 else {
 146                         int smt;
 147                         int rc;
 148
 149                         rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
 150                         if (!rc)
 151                                 smt_enabled_at_boot =
 152                                         min(threads_per_core, smt);
 153                 }
 154         } else {
 155                 dn = of_find_node_by_path("/options");
 156                 if (dn) {
 157                         smt_option = of_get_property(dn, "ibm,smt-enabled",
 158                                                      NULL);
 159
 160                         if (smt_option) {
 161                                 if (!strcmp(smt_option, "on"))
 162                                         smt_enabled_at_boot = threads_per_core;
 163                                 else if (!strcmp(smt_option, "off"))
 164                                         smt_enabled_at_boot = 0;
 165                         }
 166
 167                         of_node_put(dn);
 168                 }
 169         }
 170 }
 171
 172 /* Look for smt-enabled= cmdline option */
 173 static int __init early_smt_enabled(char *p)
 174 {
 175         smt_enabled_cmdline = p;
 176         return 0;
 177 }
 178 early_param("smt-enabled", early_smt_enabled);
 179
 180 #endif /* CONFIG_SMP */
 181
 182 /** Fix up paca fields required for the boot cpu */
 183 static void __init fixup_boot_paca(void)
 184 {
 185         /* The boot cpu is started */
 186         get_paca()->cpu_start = 1;
 187         /* Allow percpu accesses to work until we setup percpu data */
 188         get_paca()->data_offset = 0;
 189 }
 190
 191 static void __init configure_exceptions(void)
 192 {
 193         /*
 194          * Setup the trampolines from the lowmem exception vectors
 195          * to the kdump kernel when not using a relocatable kernel.
 196          */
 197         setup_kdump_trampoline();
 198
 199         /* Under a PAPR hypervisor, we need hypercalls */
 200         if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 201                 /* Enable AIL if possible */
 202                 pseries_enable_reloc_on_exc();
 203
 204                 /*
 205                  * Tell the hypervisor that we want our exceptions to
 206                  * be taken in little endian mode.
 207                  *
 208                  * We don't call this for big endian as our calling convention
 209                  * makes us always enter in BE, and the call may fail under
 210                  * some circumstances with kdump.
 211                  */
 212 #ifdef __LITTLE_ENDIAN__
 213                 pseries_little_endian_exceptions();
 214 #endif
 215         } else {
 216                 /* Set endian mode using OPAL */
 217                 if (firmware_has_feature(FW_FEATURE_OPAL))
 218                         opal_configure_cores();
 219
 220                 /* AIL on native is done in cpu_ready_for_interrupts() */
 221         }
 222 }
 223
 224 static void cpu_ready_for_interrupts(void)
 225 {
 226         /*
 227          * Enable AIL if supported, and we are in hypervisor mode. This
 228          * is called once for every processor.
 229          *
 230          * If we are not in hypervisor mode the job is done once for
 231          * the whole partition in configure_exceptions().
 232          */
 233         if (cpu_has_feature(CPU_FTR_HVMODE) &&
 234             cpu_has_feature(CPU_FTR_ARCH_207S)) {
 235                 unsigned long lpcr = mfspr(SPRN_LPCR);
 236                 mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
 237         }
 238
 239         /*
 240          * Fixup HFSCR:TM based on CPU features. The bit is set by our
 241          * early asm init because at that point we haven't updated our
 242          * CPU features from firmware and device-tree. Here we have,
 243          * so let's do it.
 244          */
 245         if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
 246                 mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
 247
 248         /* Set IR and DR in PACA MSR */
 249         get_paca()->kernel_msr = MSR_KERNEL;
 250 }
 251
 252 /*
 253  * Early initialization entry point. This is called by head.S
 254  * with MMU translation disabled. We rely on the "feature" of
 255  * the CPU that ignores the top 2 bits of the address in real
 256  * mode so we can access kernel globals normally provided we
 257  * only toy with things in the RMO region. From here, we do
 258  * some early parsing of the device-tree to setup out MEMBLOCK
 259  * data structures, and allocate & initialize the hash table
 260  * and segment tables so we can start running with translation
 261  * enabled.
 262  *
 263  * It is this function which will call the probe() callback of
 264  * the various platform types and copy the matching one to the
 265  * global ppc_md structure. Your platform can eventually do
 266  * some very early initializations from the probe() routine, but
 267  * this is not recommended, be very careful as, for example, the
 268  * device-tree is not accessible via normal means at this point.
 269  */
 270
 271 void __init early_setup(unsigned long dt_ptr)
 272 {
 273         static __initdata struct paca_struct boot_paca;
 274
 275         /* -------- printk is _NOT_ safe to use here ! ------- */
 276
 277         /* Identify CPU type */
 278         identify_cpu(0, mfspr(SPRN_PVR));
 279
 280         /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
 281         initialise_paca(&boot_paca, 0);
 282         setup_paca(&boot_paca);
 283         fixup_boot_paca();
 284
 285         /* -------- printk is now safe to use ------- */
 286
 287         /* Enable early debugging if any specified (see udbg.h) */
 288         udbg_early_init();
 289
 290         DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
 291
 292         /*
 293          * Do early initialization using the flattened device
 294          * tree, such as retrieving the physical memory map or
 295          * calculating/retrieving the hash table size.
 296          */
 297         early_init_devtree(__va(dt_ptr));
 298
 299         /* Now we know the logical id of our boot cpu, setup the paca. */
 300         setup_paca(&paca[boot_cpuid]);
 301         fixup_boot_paca();
 302
 303         /*
 304          * Configure exception handlers. This include setting up trampolines
 305          * if needed, setting exception endian mode, etc...
 306          */
 307         configure_exceptions();
 308
 309         /* Apply all the dynamic patching */
 310         apply_feature_fixups();
 311         setup_feature_keys();
 312
 313         /* Initialize the hash table or TLB handling */
 314         early_init_mmu();
 315
 316         /*
 317          * At this point, we can let interrupts switch to virtual mode
 318          * (the MMU has been setup), so adjust the MSR in the PACA to
 319          * have IR and DR set and enable AIL if it exists
 320          */
 321         cpu_ready_for_interrupts();
 322
 323         DBG(" <- early_setup()\n");
 324
 325 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
 326         /*
 327          * This needs to be done *last* (after the above DBG() even)
 328          *
 329          * Right after we return from this function, we turn on the MMU
 330          * which means the real-mode access trick that btext does will
 331          * no longer work, it needs to switch to using a real MMU
 332          * mapping. This call will ensure that it does
 333          */
 334         btext_map();
 335 #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
 336 }
 337
 338 #ifdef CONFIG_SMP
 339 void early_setup_secondary(void)
 340 {
 341         /* Mark interrupts disabled in PACA */
 342         get_paca()->soft_enabled = 0;
 343
 344         /* Initialize the hash table or TLB handling */
 345         early_init_mmu_secondary();
 346
 347         /*
 348          * At this point, we can let interrupts switch to virtual mode
 349          * (the MMU has been setup), so adjust the MSR in the PACA to
 350          * have IR and DR set.
 351          */
 352         cpu_ready_for_interrupts();
 353 }
 354
 355 #endif /* CONFIG_SMP */
 356
 357 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
 358 static bool use_spinloop(void)
 359 {
 360         if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
 361                 return true;
 362
 363         /*
 364          * When book3e boots from kexec, the ePAPR spin table does
 365          * not get used.
 366          */
 367         return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
 368 }
 369
 370 void smp_release_cpus(void)
 371 {
 372         unsigned long *ptr;
 373         int i;
 374
 375         if (!use_spinloop())
 376                 return;
 377
 378         DBG(" -> smp_release_cpus()\n");
 379
 380         /* All secondary cpus are spinning on a common spinloop, release them
 381          * all now so they can start to spin on their individual paca
 382          * spinloops. For non SMP kernels, the secondary cpus never get out
 383          * of the common spinloop.
 384          */
 385
 386         ptr  = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
 387                         - PHYSICAL_START);
 388         *ptr = ppc_function_entry(generic_secondary_smp_init);
 389
 390         /* And wait a bit for them to catch up */
 391         for (i = 0; i < 100000; i++) {
 392                 mb();
 393                 HMT_low();
 394                 if (spinning_secondaries == 0)
 395                         break;
 396                 udelay(1);
 397         }
 398         DBG("spinning_secondaries = %d\n", spinning_secondaries);
 399
 400         DBG(" <- smp_release_cpus()\n");
 401 }
 402 #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
 403
 404 /*
 405  * Initialize some remaining members of the ppc64_caches and systemcfg
 406  * structures
 407  * (at least until we get rid of them completely). This is mostly some
 408  * cache informations about the CPU that will be used by cache flush
 409  * routines and/or provided to userland
 410  */
 411
 412 static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
 413                             u32 bsize, u32 sets)
 414 {
 415         info->size = size;
 416         info->sets = sets;
 417         info->line_size = lsize;
 418         info->block_size = bsize;
 419         info->log_block_size = __ilog2(bsize);
 420         if (bsize)
 421                 info->blocks_per_page = PAGE_SIZE / bsize;
 422         else
 423                 info->blocks_per_page = 0;
 424
 425         if (sets == 0)
 426                 info->assoc = 0xffff;
 427         else
 428                 info->assoc = size / (sets * lsize);
 429 }
 430
 431 static bool __init parse_cache_info(struct device_node *np,
 432                                     bool icache,
 433                                     struct ppc_cache_info *info)
 434 {
 435         static const char *ipropnames[] __initdata = {
 436                 "i-cache-size",
 437                 "i-cache-sets",
 438                 "i-cache-block-size",
 439                 "i-cache-line-size",
 440         };
 441         static const char *dpropnames[] __initdata = {
 442                 "d-cache-size",
 443                 "d-cache-sets",
 444                 "d-cache-block-size",
 445                 "d-cache-line-size",
 446         };
 447         const char **propnames = icache ? ipropnames : dpropnames;
 448         const __be32 *sizep, *lsizep, *bsizep, *setsp;
 449         u32 size, lsize, bsize, sets;
 450         bool success = true;
 451
 452         size = 0;
 453         sets = -1u;
 454         lsize = bsize = cur_cpu_spec->dcache_bsize;
 455         sizep = of_get_property(np, propnames[0], NULL);
 456         if (sizep != NULL)
 457                 size = be32_to_cpu(*sizep);
 458         setsp = of_get_property(np, propnames[1], NULL);
 459         if (setsp != NULL)
 460                 sets = be32_to_cpu(*setsp);
 461         bsizep = of_get_property(np, propnames[2], NULL);
 462         lsizep = of_get_property(np, propnames[3], NULL);
 463         if (bsizep == NULL)
 464                 bsizep = lsizep;
 465         if (lsizep != NULL)
 466                 lsize = be32_to_cpu(*lsizep);
 467         if (bsizep != NULL)
 468                 bsize = be32_to_cpu(*bsizep);
 469         if (sizep == NULL || bsizep == NULL || lsizep == NULL)
 470                 success = false;
 471
 472         /*
 473          * OF is weird .. it represents fully associative caches
 474          * as "1 way" which doesn't make much sense and doesn't
 475          * leave room for direct mapped. We'll assume that 0
 476          * in OF means direct mapped for that reason.
 477          */
 478         if (sets == 1)
 479                 sets = 0;
 480         else if (sets == 0)
 481                 sets = 1;
 482
 483         init_cache_info(info, size, lsize, bsize, sets);
 484
 485         return success;
 486 }
 487
 488 void __init initialize_cache_info(void)
 489 {
 490         struct device_node *cpu = NULL, *l2, *l3 = NULL;
 491         u32 pvr;
 492
 493         DBG(" -> initialize_cache_info()\n");
 494
 495         /*
 496          * All shipping POWER8 machines have a firmware bug that
 497          * puts incorrect information in the device-tree. This will
 498          * be (hopefully) fixed for future chips but for now hard
 499          * code the values if we are running on one of these
 500          */
 501         pvr = PVR_VER(mfspr(SPRN_PVR));
 502         if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
 503             pvr == PVR_POWER8NVL) {
 504                                                 /* size    lsize   blk  sets */
 505                 init_cache_info(&ppc64_caches.l1i, 0x8000,   128,  128, 32);
 506                 init_cache_info(&ppc64_caches.l1d, 0x10000,  128,  128, 64);
 507                 init_cache_info(&ppc64_caches.l2,  0x80000,  128,  0,   512);
 508                 init_cache_info(&ppc64_caches.l3,  0x800000, 128,  0,   8192);
 509         } else
 510                 cpu = of_find_node_by_type(NULL, "cpu");
 511
 512         /*
 513          * We're assuming *all* of the CPUs have the same
 514          * d-cache and i-cache sizes... -Peter
 515          */
 516         if (cpu) {
 517                 if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
 518                         DBG("Argh, can't find dcache properties !\n");
 519
 520                 if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
 521                         DBG("Argh, can't find icache properties !\n");
 522
 523                 /*
 524                  * Try to find the L2 and L3 if any. Assume they are
 525                  * unified and use the D-side properties.
 526                  */
 527                 l2 = of_find_next_cache_node(cpu);
 528                 of_node_put(cpu);
 529                 if (l2) {
 530                         parse_cache_info(l2, false, &ppc64_caches.l2);
 531                         l3 = of_find_next_cache_node(l2);
 532                         of_node_put(l2);
 533                 }
 534                 if (l3) {
 535                         parse_cache_info(l3, false, &ppc64_caches.l3);
 536                         of_node_put(l3);
 537                 }
 538         }
 539
 540         /* For use by binfmt_elf */
 541         dcache_bsize = ppc64_caches.l1d.block_size;
 542         icache_bsize = ppc64_caches.l1i.block_size;
 543
 544         DBG(" <- initialize_cache_info()\n");
 545 }
 546
 547 /* This returns the limit below which memory accesses to the linear
 548  * mapping are guarnateed not to cause a TLB or SLB miss. This is
 549  * used to allocate interrupt or emergency stacks for which our
 550  * exception entry path doesn't deal with being interrupted.
 551  */
 552 static __init u64 safe_stack_limit(void)
 553 {
 554 #ifdef CONFIG_PPC_BOOK3E
 555         /* Freescale BookE bolts the entire linear mapping */
 556         if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
 557                 return linear_map_top;
 558         /* Other BookE, we assume the first GB is bolted */
 559         return 1ul << 30;
 560 #else
 561         /* BookS, the first segment is bolted */
 562         if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
 563                 return 1UL << SID_SHIFT_1T;
 564         return 1UL << SID_SHIFT;
 565 #endif
 566 }
 567
 568 void __init irqstack_early_init(void)
 569 {
 570         u64 limit = safe_stack_limit();
 571         unsigned int i;
 572
 573         /*
 574          * Interrupt stacks must be in the first segment since we
 575          * cannot afford to take SLB misses on them.
 576          */
 577         for_each_possible_cpu(i) {
 578                 softirq_ctx[i] = (struct thread_info *)
 579                         __va(memblock_alloc_base(THREAD_SIZE,
 580                                             THREAD_SIZE, limit));
 581                 hardirq_ctx[i] = (struct thread_info *)
 582                         __va(memblock_alloc_base(THREAD_SIZE,
 583                                             THREAD_SIZE, limit));
 584         }
 585 }
 586
 587 #ifdef CONFIG_PPC_BOOK3E
 588 void __init exc_lvl_early_init(void)
 589 {
 590         unsigned int i;
 591         unsigned long sp;
 592
 593         for_each_possible_cpu(i) {
 594                 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 595                 critirq_ctx[i] = (struct thread_info *)__va(sp);
 596                 paca[i].crit_kstack = __va(sp + THREAD_SIZE);
 597
 598                 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 599                 dbgirq_ctx[i] = (struct thread_info *)__va(sp);
 600                 paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
 601
 602                 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 603                 mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
 604                 paca[i].mc_kstack = __va(sp + THREAD_SIZE);
 605         }
 606
 607         if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
 608                 patch_exception(0x040, exc_debug_debug_book3e);
 609 }
 610 #endif
 611
 612 /*
 613  * Stack space used when we detect a bad kernel stack pointer, and
 614  * early in SMP boots before relocation is enabled. Exclusive emergency
 615  * stack for machine checks.
 616  */
 617 void __init emergency_stack_init(void)
 618 {
 619         u64 limit;
 620         unsigned int i;
 621
 622         /*
 623          * Emergency stacks must be under 256MB, we cannot afford to take
 624          * SLB misses on them. The ABI also requires them to be 128-byte
 625          * aligned.
 626          *
 627          * Since we use these as temporary stacks during secondary CPU
 628          * bringup, we need to get at them in real mode. This means they
 629          * must also be within the RMO region.
 630          */
 631         limit = min(safe_stack_limit(), ppc64_rma_size);
 632
 633         for_each_possible_cpu(i) {
 634                 struct thread_info *ti;
 635                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
 636                 klp_init_thread_info(ti);
 637                 paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
 638
 639 #ifdef CONFIG_PPC_BOOK3S_64
 640                 /* emergency stack for NMI exception handling. */
 641                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
 642                 klp_init_thread_info(ti);
 643                 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
 644
 645                 /* emergency stack for machine check exception handling. */
 646                 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
 647                 klp_init_thread_info(ti);
 648                 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
 649 #endif
 650         }
 651 }
 652
 653 #ifdef CONFIG_SMP
 654 #define PCPU_DYN_SIZE           ()
 655
 656 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 657 {
 658         return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
 659                                     __pa(MAX_DMA_ADDRESS));
 660 }
 661
 662 static void __init pcpu_fc_free(void *ptr, size_t size)
 663 {
 664         free_bootmem(__pa(ptr), size);
 665 }
 666
 667 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
 668 {
 669         if (cpu_to_node(from) == cpu_to_node(to))
 670                 return LOCAL_DISTANCE;
 671         else
 672                 return REMOTE_DISTANCE;
 673 }
 674
 675 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 676 EXPORT_SYMBOL(__per_cpu_offset);
 677
 678 void __init setup_per_cpu_areas(void)
 679 {
 680         const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 681         size_t atom_size;
 682         unsigned long delta;
 683         unsigned int cpu;
 684         int rc;
 685
 686         /*
 687          * Linear mapping is one of 4K, 1M and 16M.  For 4K, no need
 688          * to group units.  For larger mappings, use 1M atom which
 689          * should be large enough to contain a number of units.
 690          */
 691         if (mmu_linear_psize == MMU_PAGE_4K)
 692                 atom_size = PAGE_SIZE;
 693         else
 694                 atom_size = 1 << 20;
 695
 696         rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
 697                                     pcpu_fc_alloc, pcpu_fc_free);
 698         if (rc < 0)
 699                 panic("cannot initialize percpu area (err=%d)", rc);
 700
 701         delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 702         for_each_possible_cpu(cpu) {
 703                 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 704                 paca[cpu].data_offset = __per_cpu_offset[cpu];
 705         }
 706 }
 707 #endif
 708
 709 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 710 unsigned long memory_block_size_bytes(void)
 711 {
 712         if (ppc_md.memory_block_size)
 713                 return ppc_md.memory_block_size();
 714
 715         return MIN_MEMORY_BLOCK_SIZE;
 716 }
 717 #endif
 718
 719 #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
 720 struct ppc_pci_io ppc_pci_io;
 721 EXPORT_SYMBOL(ppc_pci_io);
 722 #endif
 723
 724 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 725 u64 hw_nmi_get_sample_period(int watchdog_thresh)
 726 {
 727         return ppc_proc_freq * watchdog_thresh;
 728 }
 729
 730 /*
 731  * The hardlockup detector breaks PMU event based branches and is likely
 732  * to get false positives in KVM guests, so disable it by default.
 733  */
 734 static int __init disable_hardlockup_detector(void)
 735 {
 736         hardlockup_detector_disable();
 737
 738         return 0;
 739 }
 740 early_initcall(disable_hardlockup_detector);
 741 #endif