1 /*- 2 * Copyright (c) 2015 Nathan Whitehorn 3 * Copyright (c) 2017-2018 Semihalf 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bus.h> 35 #include <sys/pcpu.h> 36 #include <sys/proc.h> 37 #include <sys/smp.h> 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/bus.h> 42 #include <machine/cpu.h> 43 #include <machine/hid.h> 44 #include <machine/platformvar.h> 45 #include <machine/pmap.h> 46 #include <machine/rtas.h> 47 #include <machine/smp.h> 48 #include <machine/spr.h> 49 #include <machine/trap.h> 50 51 #include <dev/ofw/openfirm.h> 52 #include <dev/ofw/ofw_bus.h> 53 #include <dev/ofw/ofw_bus_subr.h> 54 #include <machine/ofw_machdep.h> 55 #include <powerpc/aim/mmu_oea64.h> 56 57 #include "platform_if.h" 58 #include "opal.h" 59 60 #ifdef SMP 61 extern void *ap_pcpu; 62 #endif 63 64 void (*powernv_smp_ap_extra_init)(void); 65 66 static int powernv_probe(platform_t); 67 static int powernv_attach(platform_t); 68 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, 69 struct mem_region *avail, int *availsz); 70 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz); 71 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref); 72 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref); 73 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref); 74 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref); 75 static void powernv_smp_ap_init(platform_t); 76 #ifdef SMP 77 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu); 78 static void powernv_smp_probe_threads(platform_t); 79 static struct cpu_group *powernv_smp_topo(platform_t plat); 80 #endif 81 static void powernv_reset(platform_t); 82 static void powernv_cpu_idle(sbintime_t sbt); 83 static int powernv_cpuref_init(void); 84 static int powernv_node_numa_domain(platform_t platform, phandle_t node); 85 86 static platform_method_t powernv_methods[] = { 87 PLATFORMMETHOD(platform_probe, powernv_probe), 88 PLATFORMMETHOD(platform_attach, powernv_attach), 89 PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions), 90 PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions), 91 PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq), 92 93 PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init), 94 PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu), 95 PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu), 96 PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp), 97 #ifdef SMP 98 PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu), 99 PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads), 100 PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo), 101 #endif 102 PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain), 103 104 PLATFORMMETHOD(platform_reset, powernv_reset), 105 { 0, 0 } 106 }; 107 108 static platform_def_t powernv_platform = { 109 "powernv", 110 powernv_methods, 111 0 112 }; 113 114 static struct cpuref platform_cpuref[MAXCPU]; 115 static int platform_cpuref_cnt; 116 static int platform_cpuref_valid; 117 static int platform_associativity; 118 119 PLATFORM_DEF(powernv_platform); 120 121 static uint64_t powernv_boot_pir; 122 123 static int 124 powernv_probe(platform_t plat) 125 { 126 if (opal_check() == 0) 127 return (BUS_PROBE_SPECIFIC); 128 129 return (ENXIO); 130 } 131 132 static int 133 powernv_attach(platform_t plat) 134 { 135 uint32_t nptlp, shift = 0, slb_encoding = 0; 136 int32_t lp_size, lp_encoding; 137 char buf[255]; 138 pcell_t refpoints[3]; 139 pcell_t prop; 140 phandle_t cpu; 141 phandle_t opal; 142 int res, len, idx; 143 register_t msr; 144 145 /* Ping OPAL again just to make sure */ 146 opal_check(); 147 148 #if BYTE_ORDER == LITTLE_ENDIAN 149 opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */); 150 #else 151 opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */); 152 #endif 153 opal = OF_finddevice("/ibm,opal"); 154 155 platform_associativity = 4; /* Skiboot default. */ 156 if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints, 157 sizeof(refpoints)) > 0) { 158 platform_associativity = refpoints[0]; 159 } 160 161 if (cpu_idle_hook == NULL) 162 cpu_idle_hook = powernv_cpu_idle; 163 164 powernv_boot_pir = mfspr(SPR_PIR); 165 166 /* LPID must not be altered when PSL_DR or PSL_IR is set */ 167 msr = mfmsr(); 168 mtmsr(msr & ~(PSL_DR | PSL_IR)); 169 170 /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */ 171 mtspr(SPR_LPID, 0); 172 isync(); 173 174 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 175 lpcr |= LPCR_HVICE; 176 177 #if BYTE_ORDER == LITTLE_ENDIAN 178 lpcr |= LPCR_ILE; 179 #endif 180 181 mtspr(SPR_LPCR, lpcr); 182 isync(); 183 184 mtmsr(msr); 185 186 powernv_cpuref_init(); 187 188 /* Set SLB count from device tree */ 189 cpu = OF_peer(0); 190 cpu = OF_child(cpu); 191 while (cpu != 0) { 192 res = OF_getprop(cpu, "name", buf, sizeof(buf)); 193 if (res > 0 && strcmp(buf, "cpus") == 0) 194 break; 195 cpu = OF_peer(cpu); 196 } 197 if (cpu == 0) 198 goto out; 199 200 cpu = OF_child(cpu); 201 while (cpu != 0) { 202 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 203 if (res > 0 && strcmp(buf, "cpu") == 0) 204 break; 205 cpu = OF_peer(cpu); 206 } 207 if (cpu == 0) 208 goto out; 209 210 res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop)); 211 if (res > 0) 212 n_slbs = prop; 213 214 /* 215 * Scan the large page size property for PAPR compatible machines. 216 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 217 * for the encoding of the property. 218 */ 219 220 len = OF_getproplen(cpu, "ibm,segment-page-sizes"); 221 if (len > 0) { 222 /* 223 * We have to use a variable length array on the stack 224 * since we have very limited stack space. 225 */ 226 pcell_t arr[len/sizeof(cell_t)]; 227 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr, 228 sizeof(arr)); 229 len /= 4; 230 idx = 0; 231 while (len > 0) { 232 shift = arr[idx]; 233 slb_encoding = arr[idx + 1]; 234 nptlp = arr[idx + 2]; 235 idx += 3; 236 len -= 3; 237 while (len > 0 && nptlp) { 238 lp_size = arr[idx]; 239 lp_encoding = arr[idx+1]; 240 if (slb_encoding == SLBV_L && lp_encoding == 0) 241 break; 242 243 idx += 2; 244 len -= 2; 245 nptlp--; 246 } 247 if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) 248 break; 249 } 250 251 if (len == 0) 252 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 253 "not supported by this system."); 254 255 moea64_large_page_shift = shift; 256 moea64_large_page_size = 1ULL << lp_size; 257 } 258 259 out: 260 return (0); 261 } 262 263 void 264 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, 265 struct mem_region *avail, int *availsz) 266 { 267 268 ofw_mem_regions(phys, physsz, avail, availsz); 269 } 270 271 static void 272 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz) 273 { 274 275 ofw_numa_mem_regions(phys, physsz); 276 } 277 278 static u_long 279 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref) 280 { 281 char buf[8]; 282 phandle_t cpu, dev, root; 283 int res; 284 int32_t ticks = -1; 285 286 root = OF_peer(0); 287 dev = OF_child(root); 288 while (dev != 0) { 289 res = OF_getprop(dev, "name", buf, sizeof(buf)); 290 if (res > 0 && strcmp(buf, "cpus") == 0) 291 break; 292 dev = OF_peer(dev); 293 } 294 295 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 296 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 297 if (res > 0 && strcmp(buf, "cpu") == 0) 298 break; 299 } 300 if (cpu == 0) 301 return (512000000); 302 303 OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks)); 304 305 if (ticks <= 0) 306 panic("Unable to determine timebase frequency!"); 307 308 return (ticks); 309 310 } 311 312 static int 313 powernv_cpuref_init(void) 314 { 315 phandle_t cpu, dev; 316 char buf[32]; 317 int a, res, tmp_cpuref_cnt; 318 static struct cpuref tmp_cpuref[MAXCPU]; 319 cell_t interrupt_servers[32]; 320 uint64_t bsp; 321 322 if (platform_cpuref_valid) 323 return (0); 324 325 dev = OF_peer(0); 326 dev = OF_child(dev); 327 while (dev != 0) { 328 res = OF_getprop(dev, "name", buf, sizeof(buf)); 329 if (res > 0 && strcmp(buf, "cpus") == 0) 330 break; 331 dev = OF_peer(dev); 332 } 333 334 bsp = 0; 335 tmp_cpuref_cnt = 0; 336 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 337 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 338 if (res > 0 && strcmp(buf, "cpu") == 0) { 339 if (!ofw_bus_node_status_okay(cpu)) 340 continue; 341 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 342 if (res > 0) { 343 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", 344 interrupt_servers, res); 345 346 for (a = 0; a < res/sizeof(cell_t); a++) { 347 tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; 348 tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; 349 tmp_cpuref[tmp_cpuref_cnt].cr_domain = 350 powernv_node_numa_domain(NULL, cpu); 351 if (interrupt_servers[a] == (uint32_t)powernv_boot_pir) 352 bsp = tmp_cpuref_cnt; 353 354 tmp_cpuref_cnt++; 355 } 356 } 357 } 358 } 359 360 /* Map IDs, so BSP has CPUID 0 regardless of hwref */ 361 for (a = bsp; a < tmp_cpuref_cnt; a++) { 362 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 363 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 364 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 365 platform_cpuref_cnt++; 366 } 367 for (a = 0; a < bsp; a++) { 368 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 369 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 370 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 371 platform_cpuref_cnt++; 372 } 373 374 platform_cpuref_valid = 1; 375 376 return (0); 377 } 378 379 static int 380 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref) 381 { 382 if (platform_cpuref_valid == 0) 383 return (EINVAL); 384 385 cpuref->cr_cpuid = 0; 386 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 387 cpuref->cr_domain = platform_cpuref[0].cr_domain; 388 389 return (0); 390 } 391 392 static int 393 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref) 394 { 395 int id; 396 397 if (platform_cpuref_valid == 0) 398 return (EINVAL); 399 400 id = cpuref->cr_cpuid + 1; 401 if (id >= platform_cpuref_cnt) 402 return (ENOENT); 403 404 cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; 405 cpuref->cr_hwref = platform_cpuref[id].cr_hwref; 406 cpuref->cr_domain = platform_cpuref[id].cr_domain; 407 408 return (0); 409 } 410 411 static int 412 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref) 413 { 414 415 cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; 416 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 417 cpuref->cr_domain = platform_cpuref[0].cr_domain; 418 return (0); 419 } 420 421 #ifdef SMP 422 static int 423 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc) 424 { 425 int result; 426 427 ap_pcpu = pc; 428 powerpc_sync(); 429 430 result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST); 431 if (result != OPAL_SUCCESS) { 432 printf("OPAL error (%d): unable to start AP %d\n", 433 result, (int)pc->pc_hwref); 434 return (ENXIO); 435 } 436 437 return (0); 438 } 439 440 static void 441 powernv_smp_probe_threads(platform_t plat) 442 { 443 char buf[8]; 444 phandle_t cpu, dev, root; 445 int res, nthreads; 446 447 root = OF_peer(0); 448 449 dev = OF_child(root); 450 while (dev != 0) { 451 res = OF_getprop(dev, "name", buf, sizeof(buf)); 452 if (res > 0 && strcmp(buf, "cpus") == 0) 453 break; 454 dev = OF_peer(dev); 455 } 456 457 nthreads = 1; 458 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 459 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 460 if (res <= 0 || strcmp(buf, "cpu") != 0) 461 continue; 462 463 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 464 465 if (res >= 0) 466 nthreads = res / sizeof(cell_t); 467 else 468 nthreads = 1; 469 break; 470 } 471 472 smp_threads_per_core = nthreads; 473 if (mp_ncpus % nthreads == 0) 474 mp_ncores = mp_ncpus / nthreads; 475 } 476 477 static struct cpu_group * 478 powernv_smp_topo(platform_t plat) 479 { 480 if (mp_ncpus % smp_threads_per_core != 0) { 481 printf("WARNING: Irregular SMP topology. Performance may be " 482 "suboptimal (%d threads, %d on first core)\n", 483 mp_ncpus, smp_threads_per_core); 484 return (smp_topo_none()); 485 } 486 487 /* Don't do anything fancier for non-threaded SMP */ 488 if (smp_threads_per_core == 1) 489 return (smp_topo_none()); 490 491 return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core, 492 CG_FLAG_SMT)); 493 } 494 495 #endif 496 497 static void 498 powernv_reset(platform_t platform) 499 { 500 501 opal_call(OPAL_CEC_REBOOT); 502 } 503 504 static void 505 powernv_smp_ap_init(platform_t platform) 506 { 507 508 if (powernv_smp_ap_extra_init != NULL) 509 powernv_smp_ap_extra_init(); 510 } 511 512 static void 513 powernv_cpu_idle(sbintime_t sbt) 514 { 515 } 516 517 static int 518 powernv_node_numa_domain(platform_t platform, phandle_t node) 519 { 520 /* XXX: Is locking necessary in here? */ 521 static int numa_domains[MAXMEMDOM]; 522 static int numa_max_domain; 523 cell_t associativity[5]; 524 int i, res; 525 526 #ifndef NUMA 527 return (0); 528 #endif 529 if (vm_ndomains == 1) 530 return (0); 531 532 res = OF_getencprop(node, "ibm,associativity", 533 associativity, sizeof(associativity)); 534 535 /* 536 * If this node doesn't have associativity, or if there are not 537 * enough elements in it, check its parent. 538 */ 539 if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) { 540 node = OF_parent(node); 541 /* If already at the root, use default domain. */ 542 if (node == 0) 543 return (0); 544 return (powernv_node_numa_domain(platform, node)); 545 } 546 547 for (i = 0; i < numa_max_domain; i++) { 548 if (numa_domains[i] == associativity[platform_associativity]) 549 return (i); 550 } 551 if (i < MAXMEMDOM) 552 numa_domains[numa_max_domain++] = 553 associativity[platform_associativity]; 554 else 555 i = 0; 556 557 return (i); 558 } 559 560 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */ 561 static void 562 powernv_setup_nmmu(void *unused) 563 { 564 if (opal_check() != 0) 565 return; 566 opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR)); 567 } 568 569 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL); 570