1 /* 2 * 3 * Common boot and setup code. 4 * 5 * Copyright (C) 2001 PPC64 Team, IBM Corp 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 */ 12 13 #define DEBUG 14 15 #include <linux/export.h> 16 #include <linux/string.h> 17 #include <linux/sched.h> 18 #include <linux/init.h> 19 #include <linux/kernel.h> 20 #include <linux/reboot.h> 21 #include <linux/delay.h> 22 #include <linux/initrd.h> 23 #include <linux/seq_file.h> 24 #include <linux/ioport.h> 25 #include <linux/console.h> 26 #include <linux/utsname.h> 27 #include <linux/tty.h> 28 #include <linux/root_dev.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 #include <linux/unistd.h> 32 #include <linux/serial.h> 33 #include <linux/serial_8250.h> 34 #include <linux/bootmem.h> 35 #include <linux/pci.h> 36 #include <linux/lockdep.h> 37 #include <linux/memblock.h> 38 #include <linux/memory.h> 39 #include <linux/nmi.h> 40 41 #include <asm/debugfs.h> 42 #include <asm/io.h> 43 #include <asm/kdump.h> 44 #include <asm/prom.h> 45 #include <asm/processor.h> 46 #include <asm/pgtable.h> 47 #include <asm/smp.h> 48 #include <asm/elf.h> 49 #include <asm/machdep.h> 50 #include <asm/paca.h> 51 #include <asm/time.h> 52 #include <asm/cputable.h> 53 #include <asm/dt_cpu_ftrs.h> 54 #include <asm/sections.h> 55 #include <asm/btext.h> 56 #include <asm/nvram.h> 57 #include <asm/setup.h> 58 #include <asm/rtas.h> 59 #include <asm/iommu.h> 60 #include <asm/serial.h> 61 #include <asm/cache.h> 62 #include <asm/page.h> 63 #include <asm/mmu.h> 64 #include <asm/firmware.h> 65 #include <asm/xmon.h> 66 #include <asm/udbg.h> 67 #include <asm/kexec.h> 68 #include <asm/code-patching.h> 69 #include <asm/livepatch.h> 70 #include <asm/opal.h> 71 #include <asm/cputhreads.h> 72 73 #include "setup.h" 74 75 #ifdef DEBUG 76 #define DBG(fmt...) udbg_printf(fmt) 77 #else 78 #define DBG(fmt...) 79 #endif 80 81 int spinning_secondaries; 82 u64 ppc64_pft_size; 83 84 struct ppc64_caches ppc64_caches = { 85 .l1d = { 86 .block_size = 0x40, 87 .log_block_size = 6, 88 }, 89 .l1i = { 90 .block_size = 0x40, 91 .log_block_size = 6 92 }, 93 }; 94 EXPORT_SYMBOL_GPL(ppc64_caches); 95 96 #if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP) 97 void __init setup_tlb_core_data(void) 98 { 99 int cpu; 100 101 BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); 102 103 for_each_possible_cpu(cpu) { 104 int first = cpu_first_thread_sibling(cpu); 105 106 /* 107 * If we boot via kdump on a non-primary thread, 108 * make sure we point at the thread that actually 109 * set up this TLB. 110 */ 111 if (cpu_first_thread_sibling(boot_cpuid) == first) 112 first = boot_cpuid; 113 114 paca[cpu].tcd_ptr = &paca[first].tcd; 115 116 /* 117 * If we have threads, we need either tlbsrx. 118 * or e6500 tablewalk mode, or else TLB handlers 119 * will be racy and could produce duplicate entries. 120 * Should we panic instead? 121 */ 122 WARN_ONCE(smt_enabled_at_boot >= 2 && 123 !mmu_has_feature(MMU_FTR_USE_TLBRSRV) && 124 book3e_htw_mode != PPC_HTW_E6500, 125 "%s: unsupported MMU configuration\n", __func__); 126 } 127 } 128 #endif 129 130 #ifdef CONFIG_SMP 131 132 static char *smt_enabled_cmdline; 133 134 /* Look for ibm,smt-enabled OF option */ 135 void __init check_smt_enabled(void) 136 { 137 struct device_node *dn; 138 const char *smt_option; 139 140 /* Default to enabling all threads */ 141 smt_enabled_at_boot = threads_per_core; 142 143 /* Allow the command line to overrule the OF option */ 144 if (smt_enabled_cmdline) { 145 if (!strcmp(smt_enabled_cmdline, "on")) 146 smt_enabled_at_boot = threads_per_core; 147 else if (!strcmp(smt_enabled_cmdline, "off")) 148 smt_enabled_at_boot = 0; 149 else { 150 int smt; 151 int rc; 152 153 rc = kstrtoint(smt_enabled_cmdline, 10, &smt); 154 if (!rc) 155 smt_enabled_at_boot = 156 min(threads_per_core, smt); 157 } 158 } else { 159 dn = of_find_node_by_path("/options"); 160 if (dn) { 161 smt_option = of_get_property(dn, "ibm,smt-enabled", 162 NULL); 163 164 if (smt_option) { 165 if (!strcmp(smt_option, "on")) 166 smt_enabled_at_boot = threads_per_core; 167 else if (!strcmp(smt_option, "off")) 168 smt_enabled_at_boot = 0; 169 } 170 171 of_node_put(dn); 172 } 173 } 174 } 175 176 /* Look for smt-enabled= cmdline option */ 177 static int __init early_smt_enabled(char *p) 178 { 179 smt_enabled_cmdline = p; 180 return 0; 181 } 182 early_param("smt-enabled", early_smt_enabled); 183 184 #endif /* CONFIG_SMP */ 185 186 /** Fix up paca fields required for the boot cpu */ 187 static void __init fixup_boot_paca(void) 188 { 189 /* The boot cpu is started */ 190 get_paca()->cpu_start = 1; 191 /* Allow percpu accesses to work until we setup percpu data */ 192 get_paca()->data_offset = 0; 193 } 194 195 static void __init configure_exceptions(void) 196 { 197 /* 198 * Setup the trampolines from the lowmem exception vectors 199 * to the kdump kernel when not using a relocatable kernel. 200 */ 201 setup_kdump_trampoline(); 202 203 /* Under a PAPR hypervisor, we need hypercalls */ 204 if (firmware_has_feature(FW_FEATURE_SET_MODE)) { 205 /* Enable AIL if possible */ 206 pseries_enable_reloc_on_exc(); 207 208 /* 209 * Tell the hypervisor that we want our exceptions to 210 * be taken in little endian mode. 211 * 212 * We don't call this for big endian as our calling convention 213 * makes us always enter in BE, and the call may fail under 214 * some circumstances with kdump. 215 */ 216 #ifdef __LITTLE_ENDIAN__ 217 pseries_little_endian_exceptions(); 218 #endif 219 } else { 220 /* Set endian mode using OPAL */ 221 if (firmware_has_feature(FW_FEATURE_OPAL)) 222 opal_configure_cores(); 223 224 /* AIL on native is done in cpu_ready_for_interrupts() */ 225 } 226 } 227 228 static void cpu_ready_for_interrupts(void) 229 { 230 /* 231 * Enable AIL if supported, and we are in hypervisor mode. This 232 * is called once for every processor. 233 * 234 * If we are not in hypervisor mode the job is done once for 235 * the whole partition in configure_exceptions(). 236 */ 237 if (cpu_has_feature(CPU_FTR_HVMODE) && 238 cpu_has_feature(CPU_FTR_ARCH_207S)) { 239 unsigned long lpcr = mfspr(SPRN_LPCR); 240 mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); 241 } 242 243 /* 244 * Fixup HFSCR:TM based on CPU features. The bit is set by our 245 * early asm init because at that point we haven't updated our 246 * CPU features from firmware and device-tree. Here we have, 247 * so let's do it. 248 */ 249 if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP)) 250 mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM); 251 252 /* Set IR and DR in PACA MSR */ 253 get_paca()->kernel_msr = MSR_KERNEL; 254 } 255 256 /* 257 * Early initialization entry point. This is called by head.S 258 * with MMU translation disabled. We rely on the "feature" of 259 * the CPU that ignores the top 2 bits of the address in real 260 * mode so we can access kernel globals normally provided we 261 * only toy with things in the RMO region. From here, we do 262 * some early parsing of the device-tree to setup out MEMBLOCK 263 * data structures, and allocate & initialize the hash table 264 * and segment tables so we can start running with translation 265 * enabled. 266 * 267 * It is this function which will call the probe() callback of 268 * the various platform types and copy the matching one to the 269 * global ppc_md structure. Your platform can eventually do 270 * some very early initializations from the probe() routine, but 271 * this is not recommended, be very careful as, for example, the 272 * device-tree is not accessible via normal means at this point. 273 */ 274 275 void __init early_setup(unsigned long dt_ptr) 276 { 277 static __initdata struct paca_struct boot_paca; 278 279 /* -------- printk is _NOT_ safe to use here ! ------- */ 280 281 /* Try new device tree based feature discovery ... */ 282 if (!dt_cpu_ftrs_init(__va(dt_ptr))) 283 /* Otherwise use the old style CPU table */ 284 identify_cpu(0, mfspr(SPRN_PVR)); 285 286 /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ 287 initialise_paca(&boot_paca, 0); 288 setup_paca(&boot_paca); 289 fixup_boot_paca(); 290 291 /* -------- printk is now safe to use ------- */ 292 293 /* Enable early debugging if any specified (see udbg.h) */ 294 udbg_early_init(); 295 296 DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr); 297 298 /* 299 * Do early initialization using the flattened device 300 * tree, such as retrieving the physical memory map or 301 * calculating/retrieving the hash table size. 302 */ 303 early_init_devtree(__va(dt_ptr)); 304 305 /* Now we know the logical id of our boot cpu, setup the paca. */ 306 setup_paca(&paca[boot_cpuid]); 307 fixup_boot_paca(); 308 309 /* 310 * Configure exception handlers. This include setting up trampolines 311 * if needed, setting exception endian mode, etc... 312 */ 313 configure_exceptions(); 314 315 /* Apply all the dynamic patching */ 316 apply_feature_fixups(); 317 setup_feature_keys(); 318 319 /* Initialize the hash table or TLB handling */ 320 early_init_mmu(); 321 322 /* 323 * After firmware and early platform setup code has set things up, 324 * we note the SPR values for configurable control/performance 325 * registers, and use those as initial defaults. 326 */ 327 record_spr_defaults(); 328 329 /* 330 * At this point, we can let interrupts switch to virtual mode 331 * (the MMU has been setup), so adjust the MSR in the PACA to 332 * have IR and DR set and enable AIL if it exists 333 */ 334 cpu_ready_for_interrupts(); 335 336 DBG(" <- early_setup()\n"); 337 338 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX 339 /* 340 * This needs to be done *last* (after the above DBG() even) 341 * 342 * Right after we return from this function, we turn on the MMU 343 * which means the real-mode access trick that btext does will 344 * no longer work, it needs to switch to using a real MMU 345 * mapping. This call will ensure that it does 346 */ 347 btext_map(); 348 #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ 349 } 350 351 #ifdef CONFIG_SMP 352 void early_setup_secondary(void) 353 { 354 /* Mark interrupts disabled in PACA */ 355 get_paca()->soft_enabled = 0; 356 357 /* Initialize the hash table or TLB handling */ 358 early_init_mmu_secondary(); 359 360 /* 361 * At this point, we can let interrupts switch to virtual mode 362 * (the MMU has been setup), so adjust the MSR in the PACA to 363 * have IR and DR set. 364 */ 365 cpu_ready_for_interrupts(); 366 } 367 368 #endif /* CONFIG_SMP */ 369 370 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) 371 static bool use_spinloop(void) 372 { 373 if (IS_ENABLED(CONFIG_PPC_BOOK3S)) { 374 /* 375 * See comments in head_64.S -- not all platforms insert 376 * secondaries at __secondary_hold and wait at the spin 377 * loop. 378 */ 379 if (firmware_has_feature(FW_FEATURE_OPAL)) 380 return false; 381 return true; 382 } 383 384 /* 385 * When book3e boots from kexec, the ePAPR spin table does 386 * not get used. 387 */ 388 return of_property_read_bool(of_chosen, "linux,booted-from-kexec"); 389 } 390 391 void smp_release_cpus(void) 392 { 393 unsigned long *ptr; 394 int i; 395 396 if (!use_spinloop()) 397 return; 398 399 DBG(" -> smp_release_cpus()\n"); 400 401 /* All secondary cpus are spinning on a common spinloop, release them 402 * all now so they can start to spin on their individual paca 403 * spinloops. For non SMP kernels, the secondary cpus never get out 404 * of the common spinloop. 405 */ 406 407 ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop 408 - PHYSICAL_START); 409 *ptr = ppc_function_entry(generic_secondary_smp_init); 410 411 /* And wait a bit for them to catch up */ 412 for (i = 0; i < 100000; i++) { 413 mb(); 414 HMT_low(); 415 if (spinning_secondaries == 0) 416 break; 417 udelay(1); 418 } 419 DBG("spinning_secondaries = %d\n", spinning_secondaries); 420 421 DBG(" <- smp_release_cpus()\n"); 422 } 423 #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */ 424 425 /* 426 * Initialize some remaining members of the ppc64_caches and systemcfg 427 * structures 428 * (at least until we get rid of them completely). This is mostly some 429 * cache informations about the CPU that will be used by cache flush 430 * routines and/or provided to userland 431 */ 432 433 static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, 434 u32 bsize, u32 sets) 435 { 436 info->size = size; 437 info->sets = sets; 438 info->line_size = lsize; 439 info->block_size = bsize; 440 info->log_block_size = __ilog2(bsize); 441 if (bsize) 442 info->blocks_per_page = PAGE_SIZE / bsize; 443 else 444 info->blocks_per_page = 0; 445 446 if (sets == 0) 447 info->assoc = 0xffff; 448 else 449 info->assoc = size / (sets * lsize); 450 } 451 452 static bool __init parse_cache_info(struct device_node *np, 453 bool icache, 454 struct ppc_cache_info *info) 455 { 456 static const char *ipropnames[] __initdata = { 457 "i-cache-size", 458 "i-cache-sets", 459 "i-cache-block-size", 460 "i-cache-line-size", 461 }; 462 static const char *dpropnames[] __initdata = { 463 "d-cache-size", 464 "d-cache-sets", 465 "d-cache-block-size", 466 "d-cache-line-size", 467 }; 468 const char **propnames = icache ? ipropnames : dpropnames; 469 const __be32 *sizep, *lsizep, *bsizep, *setsp; 470 u32 size, lsize, bsize, sets; 471 bool success = true; 472 473 size = 0; 474 sets = -1u; 475 lsize = bsize = cur_cpu_spec->dcache_bsize; 476 sizep = of_get_property(np, propnames[0], NULL); 477 if (sizep != NULL) 478 size = be32_to_cpu(*sizep); 479 setsp = of_get_property(np, propnames[1], NULL); 480 if (setsp != NULL) 481 sets = be32_to_cpu(*setsp); 482 bsizep = of_get_property(np, propnames[2], NULL); 483 lsizep = of_get_property(np, propnames[3], NULL); 484 if (bsizep == NULL) 485 bsizep = lsizep; 486 if (lsizep != NULL) 487 lsize = be32_to_cpu(*lsizep); 488 if (bsizep != NULL) 489 bsize = be32_to_cpu(*bsizep); 490 if (sizep == NULL || bsizep == NULL || lsizep == NULL) 491 success = false; 492 493 /* 494 * OF is weird .. it represents fully associative caches 495 * as "1 way" which doesn't make much sense and doesn't 496 * leave room for direct mapped. We'll assume that 0 497 * in OF means direct mapped for that reason. 498 */ 499 if (sets == 1) 500 sets = 0; 501 else if (sets == 0) 502 sets = 1; 503 504 init_cache_info(info, size, lsize, bsize, sets); 505 506 return success; 507 } 508 509 void __init initialize_cache_info(void) 510 { 511 struct device_node *cpu = NULL, *l2, *l3 = NULL; 512 u32 pvr; 513 514 DBG(" -> initialize_cache_info()\n"); 515 516 /* 517 * All shipping POWER8 machines have a firmware bug that 518 * puts incorrect information in the device-tree. This will 519 * be (hopefully) fixed for future chips but for now hard 520 * code the values if we are running on one of these 521 */ 522 pvr = PVR_VER(mfspr(SPRN_PVR)); 523 if (pvr == PVR_POWER8 || pvr == PVR_POWER8E || 524 pvr == PVR_POWER8NVL) { 525 /* size lsize blk sets */ 526 init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32); 527 init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64); 528 init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512); 529 init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192); 530 } else 531 cpu = of_find_node_by_type(NULL, "cpu"); 532 533 /* 534 * We're assuming *all* of the CPUs have the same 535 * d-cache and i-cache sizes... -Peter 536 */ 537 if (cpu) { 538 if (!parse_cache_info(cpu, false, &ppc64_caches.l1d)) 539 DBG("Argh, can't find dcache properties !\n"); 540 541 if (!parse_cache_info(cpu, true, &ppc64_caches.l1i)) 542 DBG("Argh, can't find icache properties !\n"); 543 544 /* 545 * Try to find the L2 and L3 if any. Assume they are 546 * unified and use the D-side properties. 547 */ 548 l2 = of_find_next_cache_node(cpu); 549 of_node_put(cpu); 550 if (l2) { 551 parse_cache_info(l2, false, &ppc64_caches.l2); 552 l3 = of_find_next_cache_node(l2); 553 of_node_put(l2); 554 } 555 if (l3) { 556 parse_cache_info(l3, false, &ppc64_caches.l3); 557 of_node_put(l3); 558 } 559 } 560 561 /* For use by binfmt_elf */ 562 dcache_bsize = ppc64_caches.l1d.block_size; 563 icache_bsize = ppc64_caches.l1i.block_size; 564 565 cur_cpu_spec->dcache_bsize = dcache_bsize; 566 cur_cpu_spec->icache_bsize = icache_bsize; 567 568 DBG(" <- initialize_cache_info()\n"); 569 } 570 571 /* This returns the limit below which memory accesses to the linear 572 * mapping are guarnateed not to cause a TLB or SLB miss. This is 573 * used to allocate interrupt or emergency stacks for which our 574 * exception entry path doesn't deal with being interrupted. 575 */ 576 static __init u64 safe_stack_limit(void) 577 { 578 #ifdef CONFIG_PPC_BOOK3E 579 /* Freescale BookE bolts the entire linear mapping */ 580 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) 581 return linear_map_top; 582 /* Other BookE, we assume the first GB is bolted */ 583 return 1ul << 30; 584 #else 585 if (early_radix_enabled()) 586 return ULONG_MAX; 587 588 /* BookS, the first segment is bolted */ 589 if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) 590 return 1UL << SID_SHIFT_1T; 591 return 1UL << SID_SHIFT; 592 #endif 593 } 594 595 void __init irqstack_early_init(void) 596 { 597 u64 limit = safe_stack_limit(); 598 unsigned int i; 599 600 /* 601 * Interrupt stacks must be in the first segment since we 602 * cannot afford to take SLB misses on them. They are not 603 * accessed in realmode. 604 */ 605 for_each_possible_cpu(i) { 606 softirq_ctx[i] = (struct thread_info *) 607 __va(memblock_alloc_base(THREAD_SIZE, 608 THREAD_SIZE, limit)); 609 hardirq_ctx[i] = (struct thread_info *) 610 __va(memblock_alloc_base(THREAD_SIZE, 611 THREAD_SIZE, limit)); 612 } 613 } 614 615 #ifdef CONFIG_PPC_BOOK3E 616 void __init exc_lvl_early_init(void) 617 { 618 unsigned int i; 619 unsigned long sp; 620 621 for_each_possible_cpu(i) { 622 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 623 critirq_ctx[i] = (struct thread_info *)__va(sp); 624 paca[i].crit_kstack = __va(sp + THREAD_SIZE); 625 626 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 627 dbgirq_ctx[i] = (struct thread_info *)__va(sp); 628 paca[i].dbg_kstack = __va(sp + THREAD_SIZE); 629 630 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE); 631 mcheckirq_ctx[i] = (struct thread_info *)__va(sp); 632 paca[i].mc_kstack = __va(sp + THREAD_SIZE); 633 } 634 635 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC)) 636 patch_exception(0x040, exc_debug_debug_book3e); 637 } 638 #endif 639 640 /* 641 * Emergency stacks are used for a range of things, from asynchronous 642 * NMIs (system reset, machine check) to synchronous, process context. 643 * We set preempt_count to zero, even though that isn't necessarily correct. To 644 * get the right value we'd need to copy it from the previous thread_info, but 645 * doing that might fault causing more problems. 646 * TODO: what to do with accounting? 647 */ 648 static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu) 649 { 650 ti->task = NULL; 651 ti->cpu = cpu; 652 ti->preempt_count = 0; 653 ti->local_flags = 0; 654 ti->flags = 0; 655 klp_init_thread_info(ti); 656 } 657 658 /* 659 * Stack space used when we detect a bad kernel stack pointer, and 660 * early in SMP boots before relocation is enabled. Exclusive emergency 661 * stack for machine checks. 662 */ 663 void __init emergency_stack_init(void) 664 { 665 u64 limit; 666 unsigned int i; 667 668 /* 669 * Emergency stacks must be under 256MB, we cannot afford to take 670 * SLB misses on them. The ABI also requires them to be 128-byte 671 * aligned. 672 * 673 * Since we use these as temporary stacks during secondary CPU 674 * bringup, machine check, system reset, and HMI, we need to get 675 * at them in real mode. This means they must also be within the RMO 676 * region. 677 * 678 * The IRQ stacks allocated elsewhere in this file are zeroed and 679 * initialized in kernel/irq.c. These are initialized here in order 680 * to have emergency stacks available as early as possible. 681 */ 682 limit = min(safe_stack_limit(), ppc64_rma_size); 683 684 for_each_possible_cpu(i) { 685 struct thread_info *ti; 686 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 687 memset(ti, 0, THREAD_SIZE); 688 emerg_stack_init_thread_info(ti, i); 689 paca[i].emergency_sp = (void *)ti + THREAD_SIZE; 690 691 #ifdef CONFIG_PPC_BOOK3S_64 692 /* emergency stack for NMI exception handling. */ 693 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 694 memset(ti, 0, THREAD_SIZE); 695 emerg_stack_init_thread_info(ti, i); 696 paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE; 697 698 /* emergency stack for machine check exception handling. */ 699 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit)); 700 memset(ti, 0, THREAD_SIZE); 701 emerg_stack_init_thread_info(ti, i); 702 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE; 703 #endif 704 } 705 } 706 707 #ifdef CONFIG_SMP 708 #define PCPU_DYN_SIZE () 709 710 static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) 711 { 712 return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align, 713 __pa(MAX_DMA_ADDRESS)); 714 } 715 716 static void __init pcpu_fc_free(void *ptr, size_t size) 717 { 718 free_bootmem(__pa(ptr), size); 719 } 720 721 static int pcpu_cpu_distance(unsigned int from, unsigned int to) 722 { 723 if (early_cpu_to_node(from) == early_cpu_to_node(to)) 724 return LOCAL_DISTANCE; 725 else 726 return REMOTE_DISTANCE; 727 } 728 729 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 730 EXPORT_SYMBOL(__per_cpu_offset); 731 732 void __init setup_per_cpu_areas(void) 733 { 734 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 735 size_t atom_size; 736 unsigned long delta; 737 unsigned int cpu; 738 int rc; 739 740 /* 741 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need 742 * to group units. For larger mappings, use 1M atom which 743 * should be large enough to contain a number of units. 744 */ 745 if (mmu_linear_psize == MMU_PAGE_4K) 746 atom_size = PAGE_SIZE; 747 else 748 atom_size = 1 << 20; 749 750 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance, 751 pcpu_fc_alloc, pcpu_fc_free); 752 if (rc < 0) 753 panic("cannot initialize percpu area (err=%d)", rc); 754 755 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 756 for_each_possible_cpu(cpu) { 757 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 758 paca[cpu].data_offset = __per_cpu_offset[cpu]; 759 } 760 } 761 #endif 762 763 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 764 unsigned long memory_block_size_bytes(void) 765 { 766 if (ppc_md.memory_block_size) 767 return ppc_md.memory_block_size(); 768 769 return MIN_MEMORY_BLOCK_SIZE; 770 } 771 #endif 772 773 #if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) 774 struct ppc_pci_io ppc_pci_io; 775 EXPORT_SYMBOL(ppc_pci_io); 776 #endif 777 778 #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF 779 u64 hw_nmi_get_sample_period(int watchdog_thresh) 780 { 781 return ppc_proc_freq * watchdog_thresh; 782 } 783 #endif 784 785 /* 786 * The perf based hardlockup detector breaks PMU event based branches, so 787 * disable it by default. Book3S has a soft-nmi hardlockup detector based 788 * on the decrementer interrupt, so it does not suffer from this problem. 789 * 790 * It is likely to get false positives in VM guests, so disable it there 791 * by default too. 792 */ 793 static int __init disable_hardlockup_detector(void) 794 { 795 #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF 796 hardlockup_detector_disable(); 797 #else 798 if (firmware_has_feature(FW_FEATURE_LPAR)) 799 hardlockup_detector_disable(); 800 #endif 801 802 return 0; 803 } 804 early_initcall(disable_hardlockup_detector); 805 806 #ifdef CONFIG_PPC_BOOK3S_64 807 static enum l1d_flush_type enabled_flush_types; 808 static void *l1d_flush_fallback_area; 809 static bool no_rfi_flush; 810 bool rfi_flush; 811 812 static int __init handle_no_rfi_flush(char *p) 813 { 814 pr_info("rfi-flush: disabled on command line."); 815 no_rfi_flush = true; 816 return 0; 817 } 818 early_param("no_rfi_flush", handle_no_rfi_flush); 819 820 /* 821 * The RFI flush is not KPTI, but because users will see doco that says to use 822 * nopti we hijack that option here to also disable the RFI flush. 823 */ 824 static int __init handle_no_pti(char *p) 825 { 826 pr_info("rfi-flush: disabling due to 'nopti' on command line.\n"); 827 handle_no_rfi_flush(NULL); 828 return 0; 829 } 830 early_param("nopti", handle_no_pti); 831 832 static void do_nothing(void *unused) 833 { 834 /* 835 * We don't need to do the flush explicitly, just enter+exit kernel is 836 * sufficient, the RFI exit handlers will do the right thing. 837 */ 838 } 839 840 void rfi_flush_enable(bool enable) 841 { 842 if (rfi_flush == enable) 843 return; 844 845 if (enable) { 846 do_rfi_flush_fixups(enabled_flush_types); 847 on_each_cpu(do_nothing, NULL, 1); 848 } else 849 do_rfi_flush_fixups(L1D_FLUSH_NONE); 850 851 rfi_flush = enable; 852 } 853 854 static void init_fallback_flush(void) 855 { 856 u64 l1d_size, limit; 857 int cpu; 858 859 l1d_size = ppc64_caches.l1d.size; 860 limit = min(safe_stack_limit(), ppc64_rma_size); 861 862 /* 863 * Align to L1d size, and size it at 2x L1d size, to catch possible 864 * hardware prefetch runoff. We don't have a recipe for load patterns to 865 * reliably avoid the prefetcher. 866 */ 867 l1d_flush_fallback_area = __va(memblock_alloc_base(l1d_size * 2, l1d_size, limit)); 868 memset(l1d_flush_fallback_area, 0, l1d_size * 2); 869 870 for_each_possible_cpu(cpu) { 871 /* 872 * The fallback flush is currently coded for 8-way 873 * associativity. Different associativity is possible, but it 874 * will be treated as 8-way and may not evict the lines as 875 * effectively. 876 * 877 * 128 byte lines are mandatory. 878 */ 879 u64 c = l1d_size / 8; 880 881 paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; 882 paca[cpu].l1d_flush_congruence = c; 883 paca[cpu].l1d_flush_sets = c / 128; 884 } 885 } 886 887 void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) 888 { 889 if (types & L1D_FLUSH_FALLBACK) { 890 pr_info("rfi-flush: Using fallback displacement flush\n"); 891 init_fallback_flush(); 892 } 893 894 if (types & L1D_FLUSH_ORI) 895 pr_info("rfi-flush: Using ori type flush\n"); 896 897 if (types & L1D_FLUSH_MTTRIG) 898 pr_info("rfi-flush: Using mttrig type flush\n"); 899 900 enabled_flush_types = types; 901 902 if (!no_rfi_flush) 903 rfi_flush_enable(enable); 904 } 905 906 #ifdef CONFIG_DEBUG_FS 907 static int rfi_flush_set(void *data, u64 val) 908 { 909 if (val == 1) 910 rfi_flush_enable(true); 911 else if (val == 0) 912 rfi_flush_enable(false); 913 else 914 return -EINVAL; 915 916 return 0; 917 } 918 919 static int rfi_flush_get(void *data, u64 *val) 920 { 921 *val = rfi_flush ? 1 : 0; 922 return 0; 923 } 924 925 DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); 926 927 static __init int rfi_flush_debugfs_init(void) 928 { 929 debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); 930 return 0; 931 } 932 device_initcall(rfi_flush_debugfs_init); 933 #endif 934 935 ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) 936 { 937 if (rfi_flush) 938 return sprintf(buf, "Mitigation: RFI Flush\n"); 939 940 return sprintf(buf, "Vulnerable\n"); 941 } 942 #endif /* CONFIG_PPC_BOOK3S_64 */ 943