1 /*- 2 * Copyright (c) 2015 Nathan Whitehorn 3 * Copyright (c) 2017-2018 Semihalf 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bus.h> 35 #include <sys/pcpu.h> 36 #include <sys/proc.h> 37 #include <sys/smp.h> 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/bus.h> 42 #include <machine/cpu.h> 43 #include <machine/hid.h> 44 #include <machine/platformvar.h> 45 #include <machine/pmap.h> 46 #include <machine/rtas.h> 47 #include <machine/smp.h> 48 #include <machine/spr.h> 49 #include <machine/trap.h> 50 51 #include <dev/ofw/openfirm.h> 52 #include <dev/ofw/ofw_bus.h> 53 #include <dev/ofw/ofw_bus_subr.h> 54 #include <machine/ofw_machdep.h> 55 #include <powerpc/aim/mmu_oea64.h> 56 57 #include "platform_if.h" 58 #include "opal.h" 59 60 #ifdef SMP 61 extern void *ap_pcpu; 62 #endif 63 64 void (*powernv_smp_ap_extra_init)(void); 65 66 static int powernv_probe(platform_t); 67 static int powernv_attach(platform_t); 68 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, 69 struct mem_region *avail, int *availsz); 70 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz); 71 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref); 72 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref); 73 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref); 74 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref); 75 static void powernv_smp_ap_init(platform_t); 76 #ifdef SMP 77 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu); 78 static void powernv_smp_probe_threads(platform_t); 79 static struct cpu_group *powernv_smp_topo(platform_t plat); 80 #endif 81 static void powernv_reset(platform_t); 82 static void powernv_cpu_idle(sbintime_t sbt); 83 static int powernv_cpuref_init(void); 84 static int powernv_node_numa_domain(platform_t platform, phandle_t node); 85 86 static platform_method_t powernv_methods[] = { 87 PLATFORMMETHOD(platform_probe, powernv_probe), 88 PLATFORMMETHOD(platform_attach, powernv_attach), 89 PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions), 90 PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions), 91 PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq), 92 93 PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init), 94 PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu), 95 PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu), 96 PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp), 97 #ifdef SMP 98 PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu), 99 PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads), 100 PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo), 101 #endif 102 PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain), 103 104 PLATFORMMETHOD(platform_reset, powernv_reset), 105 { 0, 0 } 106 }; 107 108 static platform_def_t powernv_platform = { 109 "powernv", 110 powernv_methods, 111 0 112 }; 113 114 static struct cpuref platform_cpuref[MAXCPU]; 115 static int platform_cpuref_cnt; 116 static int platform_cpuref_valid; 117 static int platform_associativity; 118 119 PLATFORM_DEF(powernv_platform); 120 121 static uint64_t powernv_boot_pir; 122 123 static int 124 powernv_probe(platform_t plat) 125 { 126 if (opal_check() == 0) 127 return (BUS_PROBE_SPECIFIC); 128 129 return (ENXIO); 130 } 131 132 static int 133 powernv_attach(platform_t plat) 134 { 135 uint32_t nptlp, shift = 0, slb_encoding = 0; 136 int32_t lp_size, lp_encoding; 137 char buf[255]; 138 pcell_t refpoints[3]; 139 pcell_t prop; 140 phandle_t cpu; 141 phandle_t opal; 142 int res, len, idx; 143 register_t msr; 144 bool has_lp; 145 146 /* Ping OPAL again just to make sure */ 147 opal_check(); 148 149 #if BYTE_ORDER == LITTLE_ENDIAN 150 opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */); 151 #else 152 opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */); 153 #endif 154 opal = OF_finddevice("/ibm,opal"); 155 156 platform_associativity = 4; /* Skiboot default. */ 157 if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints, 158 sizeof(refpoints)) > 0) { 159 platform_associativity = refpoints[0]; 160 } 161 162 if (cpu_idle_hook == NULL) 163 cpu_idle_hook = powernv_cpu_idle; 164 165 powernv_boot_pir = mfspr(SPR_PIR); 166 167 /* LPID must not be altered when PSL_DR or PSL_IR is set */ 168 msr = mfmsr(); 169 mtmsr(msr & ~(PSL_DR | PSL_IR)); 170 171 /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */ 172 mtspr(SPR_LPID, 0); 173 isync(); 174 175 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 176 lpcr |= LPCR_HVICE; 177 178 #if BYTE_ORDER == LITTLE_ENDIAN 179 lpcr |= LPCR_ILE; 180 #endif 181 182 mtspr(SPR_LPCR, lpcr); 183 isync(); 184 185 mtmsr(msr); 186 187 powernv_cpuref_init(); 188 189 /* Set SLB count from device tree */ 190 cpu = OF_peer(0); 191 cpu = OF_child(cpu); 192 while (cpu != 0) { 193 res = OF_getprop(cpu, "name", buf, sizeof(buf)); 194 if (res > 0 && strcmp(buf, "cpus") == 0) 195 break; 196 cpu = OF_peer(cpu); 197 } 198 if (cpu == 0) 199 goto out; 200 201 cpu = OF_child(cpu); 202 while (cpu != 0) { 203 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 204 if (res > 0 && strcmp(buf, "cpu") == 0) 205 break; 206 cpu = OF_peer(cpu); 207 } 208 if (cpu == 0) 209 goto out; 210 211 res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop)); 212 if (res > 0) 213 n_slbs = prop; 214 215 /* 216 * Scan the large page size property for PAPR compatible machines. 217 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 218 * for the encoding of the property. 219 */ 220 221 len = OF_getproplen(cpu, "ibm,segment-page-sizes"); 222 if (len > 0) { 223 /* 224 * We have to use a variable length array on the stack 225 * since we have very limited stack space. 226 */ 227 pcell_t arr[len/sizeof(cell_t)]; 228 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr, 229 sizeof(arr)); 230 len /= 4; 231 idx = 0; 232 has_lp = false; 233 while (len > 0) { 234 shift = arr[idx]; 235 slb_encoding = arr[idx + 1]; 236 nptlp = arr[idx + 2]; 237 idx += 3; 238 len -= 3; 239 while (len > 0 && nptlp) { 240 lp_size = arr[idx]; 241 lp_encoding = arr[idx+1]; 242 if (slb_encoding == SLBV_L && lp_encoding == 0) 243 has_lp = true; 244 245 if (slb_encoding == SLB_PGSZ_4K_4K && 246 lp_encoding == LP_4K_16M) 247 moea64_has_lp_4k_16m = true; 248 249 idx += 2; 250 len -= 2; 251 nptlp--; 252 } 253 if (has_lp && moea64_has_lp_4k_16m) 254 break; 255 } 256 257 if (!has_lp) 258 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 259 "not supported by this system."); 260 261 moea64_large_page_shift = shift; 262 moea64_large_page_size = 1ULL << lp_size; 263 } 264 265 out: 266 return (0); 267 } 268 269 void 270 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, 271 struct mem_region *avail, int *availsz) 272 { 273 274 ofw_mem_regions(phys, physsz, avail, availsz); 275 } 276 277 static void 278 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz) 279 { 280 281 ofw_numa_mem_regions(phys, physsz); 282 } 283 284 static u_long 285 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref) 286 { 287 char buf[8]; 288 phandle_t cpu, dev, root; 289 int res; 290 int32_t ticks = -1; 291 292 root = OF_peer(0); 293 dev = OF_child(root); 294 while (dev != 0) { 295 res = OF_getprop(dev, "name", buf, sizeof(buf)); 296 if (res > 0 && strcmp(buf, "cpus") == 0) 297 break; 298 dev = OF_peer(dev); 299 } 300 301 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 302 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 303 if (res > 0 && strcmp(buf, "cpu") == 0) 304 break; 305 } 306 if (cpu == 0) 307 return (512000000); 308 309 OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks)); 310 311 if (ticks <= 0) 312 panic("Unable to determine timebase frequency!"); 313 314 return (ticks); 315 316 } 317 318 static int 319 powernv_cpuref_init(void) 320 { 321 phandle_t cpu, dev; 322 char buf[32]; 323 int a, res, tmp_cpuref_cnt; 324 static struct cpuref tmp_cpuref[MAXCPU]; 325 cell_t interrupt_servers[32]; 326 uint64_t bsp; 327 328 if (platform_cpuref_valid) 329 return (0); 330 331 dev = OF_peer(0); 332 dev = OF_child(dev); 333 while (dev != 0) { 334 res = OF_getprop(dev, "name", buf, sizeof(buf)); 335 if (res > 0 && strcmp(buf, "cpus") == 0) 336 break; 337 dev = OF_peer(dev); 338 } 339 340 bsp = 0; 341 tmp_cpuref_cnt = 0; 342 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 343 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 344 if (res > 0 && strcmp(buf, "cpu") == 0) { 345 if (!ofw_bus_node_status_okay(cpu)) 346 continue; 347 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 348 if (res > 0) { 349 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", 350 interrupt_servers, res); 351 352 for (a = 0; a < res/sizeof(cell_t); a++) { 353 tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; 354 tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; 355 tmp_cpuref[tmp_cpuref_cnt].cr_domain = 356 powernv_node_numa_domain(NULL, cpu); 357 if (interrupt_servers[a] == (uint32_t)powernv_boot_pir) 358 bsp = tmp_cpuref_cnt; 359 360 tmp_cpuref_cnt++; 361 } 362 } 363 } 364 } 365 366 /* Map IDs, so BSP has CPUID 0 regardless of hwref */ 367 for (a = bsp; a < tmp_cpuref_cnt; a++) { 368 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 369 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 370 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 371 platform_cpuref_cnt++; 372 } 373 for (a = 0; a < bsp; a++) { 374 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 375 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 376 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 377 platform_cpuref_cnt++; 378 } 379 380 platform_cpuref_valid = 1; 381 382 return (0); 383 } 384 385 static int 386 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref) 387 { 388 if (platform_cpuref_valid == 0) 389 return (EINVAL); 390 391 cpuref->cr_cpuid = 0; 392 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 393 cpuref->cr_domain = platform_cpuref[0].cr_domain; 394 395 return (0); 396 } 397 398 static int 399 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref) 400 { 401 int id; 402 403 if (platform_cpuref_valid == 0) 404 return (EINVAL); 405 406 id = cpuref->cr_cpuid + 1; 407 if (id >= platform_cpuref_cnt) 408 return (ENOENT); 409 410 cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; 411 cpuref->cr_hwref = platform_cpuref[id].cr_hwref; 412 cpuref->cr_domain = platform_cpuref[id].cr_domain; 413 414 return (0); 415 } 416 417 static int 418 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref) 419 { 420 421 cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; 422 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 423 cpuref->cr_domain = platform_cpuref[0].cr_domain; 424 return (0); 425 } 426 427 #ifdef SMP 428 static int 429 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc) 430 { 431 int result; 432 433 ap_pcpu = pc; 434 powerpc_sync(); 435 436 result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST); 437 if (result != OPAL_SUCCESS) { 438 printf("OPAL error (%d): unable to start AP %d\n", 439 result, (int)pc->pc_hwref); 440 return (ENXIO); 441 } 442 443 return (0); 444 } 445 446 static void 447 powernv_smp_probe_threads(platform_t plat) 448 { 449 char buf[8]; 450 phandle_t cpu, dev, root; 451 int res, nthreads; 452 453 root = OF_peer(0); 454 455 dev = OF_child(root); 456 while (dev != 0) { 457 res = OF_getprop(dev, "name", buf, sizeof(buf)); 458 if (res > 0 && strcmp(buf, "cpus") == 0) 459 break; 460 dev = OF_peer(dev); 461 } 462 463 nthreads = 1; 464 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 465 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 466 if (res <= 0 || strcmp(buf, "cpu") != 0) 467 continue; 468 469 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 470 471 if (res >= 0) 472 nthreads = res / sizeof(cell_t); 473 else 474 nthreads = 1; 475 break; 476 } 477 478 smp_threads_per_core = nthreads; 479 if (mp_ncpus % nthreads == 0) 480 mp_ncores = mp_ncpus / nthreads; 481 } 482 483 static struct cpu_group * 484 cpu_group_init(struct cpu_group *group, struct cpu_group *parent, 485 const cpuset_t *cpus, int children, int level, int flags) 486 { 487 struct cpu_group *child; 488 489 child = children != 0 ? smp_topo_alloc(children) : NULL; 490 491 group->cg_parent = parent; 492 group->cg_child = child; 493 CPU_COPY(cpus, &group->cg_mask); 494 group->cg_count = CPU_COUNT(cpus); 495 group->cg_children = children; 496 group->cg_level = level; 497 group->cg_flags = flags; 498 499 return (child); 500 } 501 502 static struct cpu_group * 503 powernv_smp_topo(platform_t plat) 504 { 505 struct cpu_group *core, *dom, *root; 506 cpuset_t corecpus, domcpus; 507 int cpuid, i, j, k, ncores; 508 509 if (mp_ncpus % smp_threads_per_core != 0) { 510 printf("%s: irregular SMP topology (%d threads, %d per core)\n", 511 __func__, mp_ncpus, smp_threads_per_core); 512 return (smp_topo_none()); 513 } 514 515 root = smp_topo_alloc(1); 516 dom = cpu_group_init(root, NULL, &all_cpus, vm_ndomains, CG_SHARE_NONE, 517 0); 518 519 /* 520 * Redundant layers will be collapsed by the caller so we don't need a 521 * special case for a single domain. 522 */ 523 for (i = 0; i < vm_ndomains; i++, dom++) { 524 CPU_COPY(&cpuset_domain[i], &domcpus); 525 ncores = CPU_COUNT(&domcpus) / smp_threads_per_core; 526 KASSERT(CPU_COUNT(&domcpus) % smp_threads_per_core == 0, 527 ("%s: domain %d core count not divisible by thread count", 528 __func__, i)); 529 530 core = cpu_group_init(dom, root, &domcpus, ncores, CG_SHARE_L3, 531 0); 532 for (j = 0; j < ncores; j++, core++) { 533 /* 534 * Assume that consecutive CPU IDs correspond to sibling 535 * threads. 536 */ 537 CPU_ZERO(&corecpus); 538 for (k = 0; k < smp_threads_per_core; k++) { 539 cpuid = CPU_FFS(&domcpus) - 1; 540 CPU_CLR(cpuid, &domcpus); 541 CPU_SET(cpuid, &corecpus); 542 } 543 (void)cpu_group_init(core, dom, &corecpus, 0, 544 CG_SHARE_L1, CG_FLAG_SMT); 545 } 546 } 547 548 return (root); 549 } 550 551 #endif 552 553 static void 554 powernv_reset(platform_t platform) 555 { 556 557 opal_call(OPAL_CEC_REBOOT); 558 } 559 560 static void 561 powernv_smp_ap_init(platform_t platform) 562 { 563 564 if (powernv_smp_ap_extra_init != NULL) 565 powernv_smp_ap_extra_init(); 566 } 567 568 static void 569 powernv_cpu_idle(sbintime_t sbt) 570 { 571 } 572 573 static int 574 powernv_node_numa_domain(platform_t platform, phandle_t node) 575 { 576 /* XXX: Is locking necessary in here? */ 577 static int numa_domains[MAXMEMDOM]; 578 static int numa_max_domain; 579 cell_t associativity[5]; 580 int i, res; 581 582 #ifndef NUMA 583 return (0); 584 #endif 585 i = 0; 586 TUNABLE_INT_FETCH("vm.numa.disabled", &i); 587 if (i) 588 return (0); 589 590 res = OF_getencprop(node, "ibm,associativity", 591 associativity, sizeof(associativity)); 592 593 /* 594 * If this node doesn't have associativity, or if there are not 595 * enough elements in it, check its parent. 596 */ 597 if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) { 598 node = OF_parent(node); 599 /* If already at the root, use default domain. */ 600 if (node == 0) 601 return (0); 602 return (powernv_node_numa_domain(platform, node)); 603 } 604 605 for (i = 0; i < numa_max_domain; i++) { 606 if (numa_domains[i] == associativity[platform_associativity]) 607 return (i); 608 } 609 if (i < MAXMEMDOM) 610 numa_domains[numa_max_domain++] = 611 associativity[platform_associativity]; 612 else 613 i = 0; 614 615 return (i); 616 } 617 618 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */ 619 static void 620 powernv_setup_nmmu(void *unused) 621 { 622 if (opal_check() != 0) 623 return; 624 opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR)); 625 } 626 627 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL); 628