1 /* 2 * PowerNV setup code. 3 * 4 * Copyright 2011 IBM Corp. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 12 #undef DEBUG 13 14 #include <linux/cpu.h> 15 #include <linux/errno.h> 16 #include <linux/sched.h> 17 #include <linux/kernel.h> 18 #include <linux/tty.h> 19 #include <linux/reboot.h> 20 #include <linux/init.h> 21 #include <linux/console.h> 22 #include <linux/delay.h> 23 #include <linux/irq.h> 24 #include <linux/seq_file.h> 25 #include <linux/of.h> 26 #include <linux/of_fdt.h> 27 #include <linux/interrupt.h> 28 #include <linux/bug.h> 29 #include <linux/pci.h> 30 #include <linux/cpufreq.h> 31 32 #include <asm/machdep.h> 33 #include <asm/firmware.h> 34 #include <asm/xics.h> 35 #include <asm/xive.h> 36 #include <asm/opal.h> 37 #include <asm/kexec.h> 38 #include <asm/smp.h> 39 #include <asm/tm.h> 40 #include <asm/setup.h> 41 42 #include "powernv.h" 43 44 static void pnv_setup_rfi_flush(void) 45 { 46 struct device_node *np, *fw_features; 47 enum l1d_flush_type type; 48 int enable; 49 50 /* Default to fallback in case fw-features are not available */ 51 type = L1D_FLUSH_FALLBACK; 52 enable = 1; 53 54 np = of_find_node_by_name(NULL, "ibm,opal"); 55 fw_features = of_get_child_by_name(np, "fw-features"); 56 of_node_put(np); 57 58 if (fw_features) { 59 np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); 60 if (np && of_property_read_bool(np, "enabled")) 61 type = L1D_FLUSH_MTTRIG; 62 63 of_node_put(np); 64 65 np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); 66 if (np && of_property_read_bool(np, "enabled")) 67 type = L1D_FLUSH_ORI; 68 69 of_node_put(np); 70 71 /* Enable unless firmware says NOT to */ 72 enable = 2; 73 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); 74 if (np && of_property_read_bool(np, "disabled")) 75 enable--; 76 77 of_node_put(np); 78 79 np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); 80 if (np && of_property_read_bool(np, "disabled")) 81 enable--; 82 83 np = of_get_child_by_name(fw_features, "speculation-policy-favor-security"); 84 if (np && of_property_read_bool(np, "disabled")) 85 enable = 0; 86 87 of_node_put(np); 88 of_node_put(fw_features); 89 } 90 91 setup_rfi_flush(type, enable > 0); 92 } 93 94 static void __init pnv_setup_arch(void) 95 { 96 set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); 97 98 pnv_setup_rfi_flush(); 99 100 /* Initialize SMP */ 101 pnv_smp_init(); 102 103 /* Setup PCI */ 104 pnv_pci_init(); 105 106 /* Setup RTC and NVRAM callbacks */ 107 if (firmware_has_feature(FW_FEATURE_OPAL)) 108 opal_nvram_init(); 109 110 /* Enable NAP mode */ 111 powersave_nap = 1; 112 113 /* XXX PMCS */ 114 } 115 116 static void __init pnv_init(void) 117 { 118 /* 119 * Initialize the LPC bus now so that legacy serial 120 * ports can be found on it 121 */ 122 opal_lpc_init(); 123 124 #ifdef CONFIG_HVC_OPAL 125 if (firmware_has_feature(FW_FEATURE_OPAL)) 126 hvc_opal_init_early(); 127 else 128 #endif 129 add_preferred_console("hvc", 0, NULL); 130 } 131 132 static void __init pnv_init_IRQ(void) 133 { 134 /* Try using a XIVE if available, otherwise use a XICS */ 135 if (!xive_native_init()) 136 xics_init(); 137 138 WARN_ON(!ppc_md.get_irq); 139 } 140 141 static void pnv_show_cpuinfo(struct seq_file *m) 142 { 143 struct device_node *root; 144 const char *model = ""; 145 146 root = of_find_node_by_path("/"); 147 if (root) 148 model = of_get_property(root, "model", NULL); 149 seq_printf(m, "machine\t\t: PowerNV %s\n", model); 150 if (firmware_has_feature(FW_FEATURE_OPAL)) 151 seq_printf(m, "firmware\t: OPAL\n"); 152 else 153 seq_printf(m, "firmware\t: BML\n"); 154 of_node_put(root); 155 if (radix_enabled()) 156 seq_printf(m, "MMU\t\t: Radix\n"); 157 else 158 seq_printf(m, "MMU\t\t: Hash\n"); 159 } 160 161 static void pnv_prepare_going_down(void) 162 { 163 /* 164 * Disable all notifiers from OPAL, we can't 165 * service interrupts anymore anyway 166 */ 167 opal_event_shutdown(); 168 169 /* Soft disable interrupts */ 170 local_irq_disable(); 171 172 /* 173 * Return secondary CPUs to firwmare if a flash update 174 * is pending otherwise we will get all sort of error 175 * messages about CPU being stuck etc.. This will also 176 * have the side effect of hard disabling interrupts so 177 * past this point, the kernel is effectively dead. 178 */ 179 opal_flash_term_callback(); 180 } 181 182 static void __noreturn pnv_restart(char *cmd) 183 { 184 long rc = OPAL_BUSY; 185 186 pnv_prepare_going_down(); 187 188 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 189 rc = opal_cec_reboot(); 190 if (rc == OPAL_BUSY_EVENT) 191 opal_poll_events(NULL); 192 else 193 mdelay(10); 194 } 195 for (;;) 196 opal_poll_events(NULL); 197 } 198 199 static void __noreturn pnv_power_off(void) 200 { 201 long rc = OPAL_BUSY; 202 203 pnv_prepare_going_down(); 204 205 while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { 206 rc = opal_cec_power_down(0); 207 if (rc == OPAL_BUSY_EVENT) 208 opal_poll_events(NULL); 209 else 210 mdelay(10); 211 } 212 for (;;) 213 opal_poll_events(NULL); 214 } 215 216 static void __noreturn pnv_halt(void) 217 { 218 pnv_power_off(); 219 } 220 221 static void pnv_progress(char *s, unsigned short hex) 222 { 223 } 224 225 static void pnv_shutdown(void) 226 { 227 /* Let the PCI code clear up IODA tables */ 228 pnv_pci_shutdown(); 229 230 /* 231 * Stop OPAL activity: Unregister all OPAL interrupts so they 232 * don't fire up while we kexec and make sure all potentially 233 * DMA'ing ops are complete (such as dump retrieval). 234 */ 235 opal_shutdown(); 236 } 237 238 #ifdef CONFIG_KEXEC_CORE 239 static void pnv_kexec_wait_secondaries_down(void) 240 { 241 int my_cpu, i, notified = -1; 242 243 my_cpu = get_cpu(); 244 245 for_each_online_cpu(i) { 246 uint8_t status; 247 int64_t rc, timeout = 1000; 248 249 if (i == my_cpu) 250 continue; 251 252 for (;;) { 253 rc = opal_query_cpu_status(get_hard_smp_processor_id(i), 254 &status); 255 if (rc != OPAL_SUCCESS || status != OPAL_THREAD_STARTED) 256 break; 257 barrier(); 258 if (i != notified) { 259 printk(KERN_INFO "kexec: waiting for cpu %d " 260 "(physical %d) to enter OPAL\n", 261 i, paca[i].hw_cpu_id); 262 notified = i; 263 } 264 265 /* 266 * On crash secondaries might be unreachable or hung, 267 * so timeout if we've waited too long 268 * */ 269 mdelay(1); 270 if (timeout-- == 0) { 271 printk(KERN_ERR "kexec: timed out waiting for " 272 "cpu %d (physical %d) to enter OPAL\n", 273 i, paca[i].hw_cpu_id); 274 break; 275 } 276 } 277 } 278 } 279 280 static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) 281 { 282 u64 reinit_flags; 283 284 if (xive_enabled()) 285 xive_kexec_teardown_cpu(secondary); 286 else 287 xics_kexec_teardown_cpu(secondary); 288 289 /* On OPAL, we return all CPUs to firmware */ 290 if (!firmware_has_feature(FW_FEATURE_OPAL)) 291 return; 292 293 if (secondary) { 294 /* Return secondary CPUs to firmware on OPAL v3 */ 295 mb(); 296 get_paca()->kexec_state = KEXEC_STATE_REAL_MODE; 297 mb(); 298 299 /* Return the CPU to OPAL */ 300 opal_return_cpu(); 301 } else { 302 /* Primary waits for the secondaries to have reached OPAL */ 303 pnv_kexec_wait_secondaries_down(); 304 305 /* Switch XIVE back to emulation mode */ 306 if (xive_enabled()) 307 xive_shutdown(); 308 309 /* 310 * We might be running as little-endian - now that interrupts 311 * are disabled, reset the HILE bit to big-endian so we don't 312 * take interrupts in the wrong endian later 313 * 314 * We reinit to enable both radix and hash on P9 to ensure 315 * the mode used by the next kernel is always supported. 316 */ 317 reinit_flags = OPAL_REINIT_CPUS_HILE_BE; 318 if (cpu_has_feature(CPU_FTR_ARCH_300)) 319 reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX | 320 OPAL_REINIT_CPUS_MMU_HASH; 321 opal_reinit_cpus(reinit_flags); 322 } 323 } 324 #endif /* CONFIG_KEXEC_CORE */ 325 326 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 327 static unsigned long pnv_memory_block_size(void) 328 { 329 /* 330 * We map the kernel linear region with 1GB large pages on radix. For 331 * memory hot unplug to work our memory block size must be at least 332 * this size. 333 */ 334 if (radix_enabled()) 335 return 1UL * 1024 * 1024 * 1024; 336 else 337 return 256UL * 1024 * 1024; 338 } 339 #endif 340 341 static void __init pnv_setup_machdep_opal(void) 342 { 343 ppc_md.get_boot_time = opal_get_boot_time; 344 ppc_md.restart = pnv_restart; 345 pm_power_off = pnv_power_off; 346 ppc_md.halt = pnv_halt; 347 /* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */ 348 ppc_md.machine_check_exception = opal_machine_check; 349 ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery; 350 ppc_md.hmi_exception_early = opal_hmi_exception_early; 351 ppc_md.handle_hmi_exception = opal_handle_hmi_exception; 352 } 353 354 static int __init pnv_probe(void) 355 { 356 if (!of_machine_is_compatible("ibm,powernv")) 357 return 0; 358 359 if (firmware_has_feature(FW_FEATURE_OPAL)) 360 pnv_setup_machdep_opal(); 361 362 pr_debug("PowerNV detected !\n"); 363 364 pnv_init(); 365 366 return 1; 367 } 368 369 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 370 void __init pnv_tm_init(void) 371 { 372 if (!firmware_has_feature(FW_FEATURE_OPAL) || 373 !pvr_version_is(PVR_POWER9) || 374 early_cpu_has_feature(CPU_FTR_TM)) 375 return; 376 377 if (opal_reinit_cpus(OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) != OPAL_SUCCESS) 378 return; 379 380 pr_info("Enabling TM (Transactional Memory) with Suspend Disabled\n"); 381 cur_cpu_spec->cpu_features |= CPU_FTR_TM; 382 /* Make sure "normal" HTM is off (it should be) */ 383 cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_HTM; 384 /* Turn on no suspend mode, and HTM no SC */ 385 cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NO_SUSPEND | \ 386 PPC_FEATURE2_HTM_NOSC; 387 tm_suspend_disabled = true; 388 } 389 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ 390 391 /* 392 * Returns the cpu frequency for 'cpu' in Hz. This is used by 393 * /proc/cpuinfo 394 */ 395 static unsigned long pnv_get_proc_freq(unsigned int cpu) 396 { 397 unsigned long ret_freq; 398 399 ret_freq = cpufreq_get(cpu) * 1000ul; 400 401 /* 402 * If the backend cpufreq driver does not exist, 403 * then fallback to old way of reporting the clockrate. 404 */ 405 if (!ret_freq) 406 ret_freq = ppc_proc_freq; 407 return ret_freq; 408 } 409 410 define_machine(powernv) { 411 .name = "PowerNV", 412 .probe = pnv_probe, 413 .setup_arch = pnv_setup_arch, 414 .init_IRQ = pnv_init_IRQ, 415 .show_cpuinfo = pnv_show_cpuinfo, 416 .get_proc_freq = pnv_get_proc_freq, 417 .progress = pnv_progress, 418 .machine_shutdown = pnv_shutdown, 419 .power_save = NULL, 420 .calibrate_decr = generic_calibrate_decr, 421 #ifdef CONFIG_KEXEC_CORE 422 .kexec_cpu_down = pnv_kexec_cpu_down, 423 #endif 424 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE 425 .memory_block_size = pnv_memory_block_size, 426 #endif 427 }; 428