1 /*- 2 * Copyright (c) 2015 Nathan Whitehorn 3 * Copyright (c) 2017-2018 Semihalf 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kernel.h> 31 #include <sys/bus.h> 32 #include <sys/pcpu.h> 33 #include <sys/proc.h> 34 #include <sys/smp.h> 35 #include <vm/vm.h> 36 #include <vm/pmap.h> 37 38 #include <machine/bus.h> 39 #include <machine/cpu.h> 40 #include <machine/hid.h> 41 #include <machine/platformvar.h> 42 #include <machine/pmap.h> 43 #include <machine/rtas.h> 44 #include <machine/smp.h> 45 #include <machine/spr.h> 46 #include <machine/trap.h> 47 48 #include <dev/ofw/openfirm.h> 49 #include <dev/ofw/ofw_bus.h> 50 #include <dev/ofw/ofw_bus_subr.h> 51 #include <machine/ofw_machdep.h> 52 #include <powerpc/aim/mmu_oea64.h> 53 54 #include "platform_if.h" 55 #include "opal.h" 56 57 #ifdef SMP 58 extern void *ap_pcpu; 59 #endif 60 61 void (*powernv_smp_ap_extra_init)(void); 62 63 static int powernv_probe(platform_t); 64 static int powernv_attach(platform_t); 65 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, 66 struct mem_region *avail, int *availsz); 67 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz); 68 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref); 69 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref); 70 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref); 71 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref); 72 static void powernv_smp_ap_init(platform_t); 73 #ifdef SMP 74 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu); 75 static void powernv_smp_probe_threads(platform_t); 76 static struct cpu_group *powernv_smp_topo(platform_t plat); 77 #endif 78 static void powernv_reset(platform_t); 79 static void powernv_cpu_idle(sbintime_t sbt); 80 static int powernv_cpuref_init(void); 81 static int powernv_node_numa_domain(platform_t platform, phandle_t node); 82 83 static platform_method_t powernv_methods[] = { 84 PLATFORMMETHOD(platform_probe, powernv_probe), 85 PLATFORMMETHOD(platform_attach, powernv_attach), 86 PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions), 87 PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions), 88 PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq), 89 90 PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init), 91 PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu), 92 PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu), 93 PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp), 94 #ifdef SMP 95 PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu), 96 PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads), 97 PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo), 98 #endif 99 PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain), 100 101 PLATFORMMETHOD(platform_reset, powernv_reset), 102 { 0, 0 } 103 }; 104 105 static platform_def_t powernv_platform = { 106 "powernv", 107 powernv_methods, 108 0 109 }; 110 111 static struct cpuref platform_cpuref[MAXCPU]; 112 static int platform_cpuref_cnt; 113 static int platform_cpuref_valid; 114 static int platform_associativity; 115 116 PLATFORM_DEF(powernv_platform); 117 118 static uint64_t powernv_boot_pir; 119 120 static int 121 powernv_probe(platform_t plat) 122 { 123 if (opal_check() == 0) 124 return (BUS_PROBE_SPECIFIC); 125 126 return (ENXIO); 127 } 128 129 static int 130 powernv_attach(platform_t plat) 131 { 132 uint32_t nptlp, shift = 0, slb_encoding = 0; 133 int32_t lp_size, lp_encoding; 134 char buf[255]; 135 pcell_t refpoints[3]; 136 pcell_t prop; 137 phandle_t cpu; 138 phandle_t opal; 139 int res, len, idx; 140 register_t msr; 141 register_t fscr; 142 bool has_lp; 143 144 /* Ping OPAL again just to make sure */ 145 opal_check(); 146 147 #if BYTE_ORDER == LITTLE_ENDIAN 148 opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */); 149 #else 150 opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */); 151 #endif 152 opal = OF_finddevice("/ibm,opal"); 153 154 platform_associativity = 4; /* Skiboot default. */ 155 if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints, 156 sizeof(refpoints)) > 0) { 157 platform_associativity = refpoints[0]; 158 } 159 160 if (cpu_idle_hook == NULL) 161 cpu_idle_hook = powernv_cpu_idle; 162 163 powernv_boot_pir = mfspr(SPR_PIR); 164 165 /* LPID must not be altered when PSL_DR or PSL_IR is set */ 166 msr = mfmsr(); 167 mtmsr(msr & ~(PSL_DR | PSL_IR)); 168 169 /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */ 170 mtspr(SPR_LPID, 0); 171 isync(); 172 173 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 174 lpcr |= LPCR_HVICE; 175 176 #if BYTE_ORDER == LITTLE_ENDIAN 177 lpcr |= LPCR_ILE; 178 #endif 179 180 mtspr(SPR_LPCR, lpcr); 181 isync(); 182 183 fscr = mfspr(SPR_HFSCR); 184 fscr |= FSCR_TAR | FSCR_EBB | HFSCR_BHRB | HFSCR_PM | 185 HFSCR_VECVSX | HFSCR_FP | FSCR_MSGP | FSCR_DSCR; 186 mtspr(SPR_HFSCR, fscr); 187 188 mtmsr(msr); 189 190 powernv_cpuref_init(); 191 192 /* Set SLB count from device tree */ 193 cpu = OF_peer(0); 194 cpu = OF_child(cpu); 195 while (cpu != 0) { 196 res = OF_getprop(cpu, "name", buf, sizeof(buf)); 197 if (res > 0 && strcmp(buf, "cpus") == 0) 198 break; 199 cpu = OF_peer(cpu); 200 } 201 if (cpu == 0) 202 goto out; 203 204 cpu = OF_child(cpu); 205 while (cpu != 0) { 206 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 207 if (res > 0 && strcmp(buf, "cpu") == 0) 208 break; 209 cpu = OF_peer(cpu); 210 } 211 if (cpu == 0) 212 goto out; 213 214 res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop)); 215 if (res > 0) 216 n_slbs = prop; 217 218 /* 219 * Scan the large page size property for PAPR compatible machines. 220 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 221 * for the encoding of the property. 222 */ 223 224 len = OF_getproplen(cpu, "ibm,segment-page-sizes"); 225 if (len > 0) { 226 /* 227 * We have to use a variable length array on the stack 228 * since we have very limited stack space. 229 */ 230 pcell_t arr[len/sizeof(cell_t)]; 231 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr, 232 sizeof(arr)); 233 len /= 4; 234 idx = 0; 235 has_lp = false; 236 while (len > 0) { 237 shift = arr[idx]; 238 slb_encoding = arr[idx + 1]; 239 nptlp = arr[idx + 2]; 240 idx += 3; 241 len -= 3; 242 while (len > 0 && nptlp) { 243 lp_size = arr[idx]; 244 lp_encoding = arr[idx+1]; 245 if (slb_encoding == SLBV_L && lp_encoding == 0) 246 has_lp = true; 247 248 if (slb_encoding == SLB_PGSZ_4K_4K && 249 lp_encoding == LP_4K_16M) 250 moea64_has_lp_4k_16m = true; 251 252 idx += 2; 253 len -= 2; 254 nptlp--; 255 } 256 if (has_lp && moea64_has_lp_4k_16m) 257 break; 258 } 259 260 if (!has_lp) 261 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 262 "not supported by this system."); 263 264 moea64_large_page_shift = shift; 265 moea64_large_page_size = 1ULL << lp_size; 266 } 267 268 out: 269 return (0); 270 } 271 272 void 273 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, 274 struct mem_region *avail, int *availsz) 275 { 276 277 ofw_mem_regions(phys, physsz, avail, availsz); 278 } 279 280 static void 281 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz) 282 { 283 284 ofw_numa_mem_regions(phys, physsz); 285 } 286 287 static u_long 288 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref) 289 { 290 char buf[8]; 291 phandle_t cpu, dev, root; 292 int res; 293 int32_t ticks = -1; 294 295 root = OF_peer(0); 296 dev = OF_child(root); 297 while (dev != 0) { 298 res = OF_getprop(dev, "name", buf, sizeof(buf)); 299 if (res > 0 && strcmp(buf, "cpus") == 0) 300 break; 301 dev = OF_peer(dev); 302 } 303 304 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 305 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 306 if (res > 0 && strcmp(buf, "cpu") == 0) 307 break; 308 } 309 if (cpu == 0) 310 return (512000000); 311 312 OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks)); 313 314 if (ticks <= 0) 315 panic("Unable to determine timebase frequency!"); 316 317 return (ticks); 318 319 } 320 321 static int 322 powernv_cpuref_init(void) 323 { 324 phandle_t cpu, dev; 325 char buf[32]; 326 int a, res, tmp_cpuref_cnt; 327 static struct cpuref tmp_cpuref[MAXCPU]; 328 cell_t interrupt_servers[32]; 329 uint64_t bsp; 330 331 if (platform_cpuref_valid) 332 return (0); 333 334 dev = OF_peer(0); 335 dev = OF_child(dev); 336 while (dev != 0) { 337 res = OF_getprop(dev, "name", buf, sizeof(buf)); 338 if (res > 0 && strcmp(buf, "cpus") == 0) 339 break; 340 dev = OF_peer(dev); 341 } 342 343 bsp = 0; 344 tmp_cpuref_cnt = 0; 345 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 346 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 347 if (res > 0 && strcmp(buf, "cpu") == 0) { 348 if (!ofw_bus_node_status_okay(cpu)) 349 continue; 350 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 351 if (res > 0) { 352 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", 353 interrupt_servers, res); 354 355 for (a = 0; a < res/sizeof(cell_t); a++) { 356 tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; 357 tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; 358 tmp_cpuref[tmp_cpuref_cnt].cr_domain = 359 powernv_node_numa_domain(NULL, cpu); 360 if (interrupt_servers[a] == (uint32_t)powernv_boot_pir) 361 bsp = tmp_cpuref_cnt; 362 363 tmp_cpuref_cnt++; 364 } 365 } 366 } 367 } 368 369 /* Map IDs, so BSP has CPUID 0 regardless of hwref */ 370 for (a = bsp; a < tmp_cpuref_cnt; a++) { 371 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 372 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 373 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 374 platform_cpuref_cnt++; 375 } 376 for (a = 0; a < bsp; a++) { 377 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 378 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 379 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 380 platform_cpuref_cnt++; 381 } 382 383 platform_cpuref_valid = 1; 384 385 return (0); 386 } 387 388 static int 389 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref) 390 { 391 if (platform_cpuref_valid == 0) 392 return (EINVAL); 393 394 cpuref->cr_cpuid = 0; 395 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 396 cpuref->cr_domain = platform_cpuref[0].cr_domain; 397 398 return (0); 399 } 400 401 static int 402 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref) 403 { 404 int id; 405 406 if (platform_cpuref_valid == 0) 407 return (EINVAL); 408 409 id = cpuref->cr_cpuid + 1; 410 if (id >= platform_cpuref_cnt) 411 return (ENOENT); 412 413 cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; 414 cpuref->cr_hwref = platform_cpuref[id].cr_hwref; 415 cpuref->cr_domain = platform_cpuref[id].cr_domain; 416 417 return (0); 418 } 419 420 static int 421 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref) 422 { 423 424 cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; 425 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 426 cpuref->cr_domain = platform_cpuref[0].cr_domain; 427 return (0); 428 } 429 430 #ifdef SMP 431 static int 432 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc) 433 { 434 int result; 435 436 ap_pcpu = pc; 437 powerpc_sync(); 438 439 result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST); 440 if (result != OPAL_SUCCESS) { 441 printf("OPAL error (%d): unable to start AP %d\n", 442 result, (int)pc->pc_hwref); 443 return (ENXIO); 444 } 445 446 return (0); 447 } 448 449 static void 450 powernv_smp_probe_threads(platform_t plat) 451 { 452 char buf[8]; 453 phandle_t cpu, dev, root; 454 int res, nthreads; 455 456 root = OF_peer(0); 457 458 dev = OF_child(root); 459 while (dev != 0) { 460 res = OF_getprop(dev, "name", buf, sizeof(buf)); 461 if (res > 0 && strcmp(buf, "cpus") == 0) 462 break; 463 dev = OF_peer(dev); 464 } 465 466 nthreads = 1; 467 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 468 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 469 if (res <= 0 || strcmp(buf, "cpu") != 0) 470 continue; 471 472 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 473 474 if (res >= 0) 475 nthreads = res / sizeof(cell_t); 476 else 477 nthreads = 1; 478 break; 479 } 480 481 smp_threads_per_core = nthreads; 482 if (mp_ncpus % nthreads == 0) 483 mp_ncores = mp_ncpus / nthreads; 484 } 485 486 static struct cpu_group * 487 cpu_group_init(struct cpu_group *group, struct cpu_group *parent, 488 const cpuset_t *cpus, int children, int level, int flags) 489 { 490 struct cpu_group *child; 491 492 child = children != 0 ? smp_topo_alloc(children) : NULL; 493 494 group->cg_parent = parent; 495 group->cg_child = child; 496 CPU_COPY(cpus, &group->cg_mask); 497 group->cg_count = CPU_COUNT(cpus); 498 group->cg_children = children; 499 group->cg_level = level; 500 group->cg_flags = flags; 501 502 return (child); 503 } 504 505 static struct cpu_group * 506 powernv_smp_topo(platform_t plat) 507 { 508 struct cpu_group *core, *dom, *root; 509 cpuset_t corecpus, domcpus; 510 int cpuid, i, j, k, ncores; 511 512 if (mp_ncpus % smp_threads_per_core != 0) { 513 printf("%s: irregular SMP topology (%d threads, %d per core)\n", 514 __func__, mp_ncpus, smp_threads_per_core); 515 return (smp_topo_none()); 516 } 517 518 root = smp_topo_alloc(1); 519 dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE, 520 0); 521 522 /* 523 * Redundant layers will be collapsed by the caller so we don't need a 524 * special case for a single domain. 525 */ 526 for (i = 0; i < vm_ndomains; i++, dom++) { 527 CPU_COPY(&cpuset_domain[i], &domcpus); 528 ncores = CPU_COUNT(&domcpus) / smp_threads_per_core; 529 KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0, 530 ("%s: domain %d core count not divisible by thread count", 531 __func__, i)); 532 533 core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3, 534 0); 535 for (j = 0; j < ncores; j++, core++) { 536 /* 537 * Assume that consecutive CPU IDs correspond to sibling 538 * threads. 539 */ 540 CPU_ZERO(&corecpus); 541 for (k = 0; k < smp_threads_per_core; k++) { 542 cpuid = CPU_FFS(&domcpus) - 1; 543 CPU_CLR(cpuid, &domcpus); 544 CPU_SET(cpuid, &corecpus); 545 } 546 (void)cpu_group_init(core, dom, &corecpus, 0, 547 CG_SHARE_L1, CG_FLAG_SMT); 548 } 549 } 550 551 return (root); 552 } 553 554 #endif 555 556 static void 557 powernv_reset(platform_t platform) 558 { 559 560 opal_call(OPAL_CEC_REBOOT); 561 } 562 563 static void 564 powernv_smp_ap_init(platform_t platform) 565 { 566 567 if (powernv_smp_ap_extra_init != NULL) 568 powernv_smp_ap_extra_init(); 569 } 570 571 static void 572 powernv_cpu_idle(sbintime_t sbt) 573 { 574 } 575 576 static int 577 powernv_node_numa_domain(platform_t platform, phandle_t node) 578 { 579 /* XXX: Is locking necessary in here? */ 580 static int numa_domains[MAXMEMDOM]; 581 static int numa_max_domain; 582 cell_t associativity[5]; 583 int i, res; 584 585 #ifndef NUMA 586 return (0); 587 #endif 588 i = 0; 589 TUNABLE_INT_FETCH("vm.numa.disabled", &i); 590 if (i) 591 return (0); 592 593 res = OF_getencprop(node, "ibm,associativity", 594 associativity, sizeof(associativity)); 595 596 /* 597 * If this node doesn't have associativity, or if there are not 598 * enough elements in it, check its parent. 599 */ 600 if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) { 601 node = OF_parent(node); 602 /* If already at the root, use default domain. */ 603 if (node == 0) 604 return (0); 605 return (powernv_node_numa_domain(platform, node)); 606 } 607 608 for (i = 0; i < numa_max_domain; i++) { 609 if (numa_domains[i] == associativity[platform_associativity]) 610 return (i); 611 } 612 if (i < MAXMEMDOM) 613 numa_domains[numa_max_domain++] = 614 associativity[platform_associativity]; 615 else 616 i = 0; 617 618 return (i); 619 } 620 621 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */ 622 static void 623 powernv_setup_nmmu(void *unused) 624 { 625 if (opal_check() != 0) 626 return; 627 opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR)); 628 } 629 630 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL); 631