1 /*- 2 * Copyright (c) 2015 Nathan Whitehorn 3 * Copyright (c) 2017-2018 Semihalf 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/kernel.h> 34 #include <sys/bus.h> 35 #include <sys/pcpu.h> 36 #include <sys/proc.h> 37 #include <sys/smp.h> 38 #include <vm/vm.h> 39 #include <vm/pmap.h> 40 41 #include <machine/bus.h> 42 #include <machine/cpu.h> 43 #include <machine/hid.h> 44 #include <machine/platformvar.h> 45 #include <machine/pmap.h> 46 #include <machine/rtas.h> 47 #include <machine/smp.h> 48 #include <machine/spr.h> 49 #include <machine/trap.h> 50 51 #include <dev/ofw/openfirm.h> 52 #include <machine/ofw_machdep.h> 53 #include <powerpc/aim/mmu_oea64.h> 54 55 #include "platform_if.h" 56 #include "opal.h" 57 58 #ifdef SMP 59 extern void *ap_pcpu; 60 #endif 61 62 void (*powernv_smp_ap_extra_init)(void); 63 64 static int powernv_probe(platform_t); 65 static int powernv_attach(platform_t); 66 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz, 67 struct mem_region *avail, int *availsz); 68 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz); 69 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref); 70 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref); 71 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref); 72 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref); 73 static void powernv_smp_ap_init(platform_t); 74 #ifdef SMP 75 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu); 76 static void powernv_smp_probe_threads(platform_t); 77 static struct cpu_group *powernv_smp_topo(platform_t plat); 78 #endif 79 static void powernv_reset(platform_t); 80 static void powernv_cpu_idle(sbintime_t sbt); 81 static int powernv_cpuref_init(void); 82 static int powernv_node_numa_domain(platform_t platform, phandle_t node); 83 84 static platform_method_t powernv_methods[] = { 85 PLATFORMMETHOD(platform_probe, powernv_probe), 86 PLATFORMMETHOD(platform_attach, powernv_attach), 87 PLATFORMMETHOD(platform_mem_regions, powernv_mem_regions), 88 PLATFORMMETHOD(platform_numa_mem_regions, powernv_numa_mem_regions), 89 PLATFORMMETHOD(platform_timebase_freq, powernv_timebase_freq), 90 91 PLATFORMMETHOD(platform_smp_ap_init, powernv_smp_ap_init), 92 PLATFORMMETHOD(platform_smp_first_cpu, powernv_smp_first_cpu), 93 PLATFORMMETHOD(platform_smp_next_cpu, powernv_smp_next_cpu), 94 PLATFORMMETHOD(platform_smp_get_bsp, powernv_smp_get_bsp), 95 #ifdef SMP 96 PLATFORMMETHOD(platform_smp_start_cpu, powernv_smp_start_cpu), 97 PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads), 98 PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo), 99 #endif 100 PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain), 101 102 PLATFORMMETHOD(platform_reset, powernv_reset), 103 104 { 0, 0 } 105 }; 106 107 static platform_def_t powernv_platform = { 108 "powernv", 109 powernv_methods, 110 0 111 }; 112 113 static struct cpuref platform_cpuref[MAXCPU]; 114 static int platform_cpuref_cnt; 115 static int platform_cpuref_valid; 116 static int platform_associativity; 117 118 PLATFORM_DEF(powernv_platform); 119 120 static uint64_t powernv_boot_pir; 121 122 static int 123 powernv_probe(platform_t plat) 124 { 125 if (opal_check() == 0) 126 return (BUS_PROBE_SPECIFIC); 127 128 return (ENXIO); 129 } 130 131 static int 132 powernv_attach(platform_t plat) 133 { 134 uint32_t nptlp, shift = 0, slb_encoding = 0; 135 int32_t lp_size, lp_encoding; 136 char buf[255]; 137 pcell_t refpoints[3]; 138 pcell_t prop; 139 phandle_t cpu; 140 phandle_t opal; 141 int res, len, idx; 142 register_t msr; 143 144 /* Ping OPAL again just to make sure */ 145 opal_check(); 146 147 #if BYTE_ORDER == LITTLE_ENDIAN 148 opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */); 149 #else 150 opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */); 151 #endif 152 opal = OF_finddevice("/ibm,opal"); 153 154 platform_associativity = 4; /* Skiboot default. */ 155 if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints, 156 sizeof(refpoints)) > 0) { 157 platform_associativity = refpoints[0]; 158 } 159 160 if (cpu_idle_hook == NULL) 161 cpu_idle_hook = powernv_cpu_idle; 162 163 powernv_boot_pir = mfspr(SPR_PIR); 164 165 /* LPID must not be altered when PSL_DR or PSL_IR is set */ 166 msr = mfmsr(); 167 mtmsr(msr & ~(PSL_DR | PSL_IR)); 168 169 /* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */ 170 mtspr(SPR_LPID, 0); 171 isync(); 172 173 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) 174 lpcr |= LPCR_HVICE; 175 176 mtspr(SPR_LPCR, lpcr); 177 isync(); 178 179 mtmsr(msr); 180 181 powernv_cpuref_init(); 182 183 /* Set SLB count from device tree */ 184 cpu = OF_peer(0); 185 cpu = OF_child(cpu); 186 while (cpu != 0) { 187 res = OF_getprop(cpu, "name", buf, sizeof(buf)); 188 if (res > 0 && strcmp(buf, "cpus") == 0) 189 break; 190 cpu = OF_peer(cpu); 191 } 192 if (cpu == 0) 193 goto out; 194 195 cpu = OF_child(cpu); 196 while (cpu != 0) { 197 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 198 if (res > 0 && strcmp(buf, "cpu") == 0) 199 break; 200 cpu = OF_peer(cpu); 201 } 202 if (cpu == 0) 203 goto out; 204 205 res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop)); 206 if (res > 0) 207 n_slbs = prop; 208 209 /* 210 * Scan the large page size property for PAPR compatible machines. 211 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties' 212 * for the encoding of the property. 213 */ 214 215 len = OF_getproplen(cpu, "ibm,segment-page-sizes"); 216 if (len > 0) { 217 /* 218 * We have to use a variable length array on the stack 219 * since we have very limited stack space. 220 */ 221 pcell_t arr[len/sizeof(cell_t)]; 222 res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr, 223 sizeof(arr)); 224 len /= 4; 225 idx = 0; 226 while (len > 0) { 227 shift = arr[idx]; 228 slb_encoding = arr[idx + 1]; 229 nptlp = arr[idx + 2]; 230 idx += 3; 231 len -= 3; 232 while (len > 0 && nptlp) { 233 lp_size = arr[idx]; 234 lp_encoding = arr[idx+1]; 235 if (slb_encoding == SLBV_L && lp_encoding == 0) 236 break; 237 238 idx += 2; 239 len -= 2; 240 nptlp--; 241 } 242 if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0) 243 break; 244 } 245 246 if (len == 0) 247 panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) " 248 "not supported by this system."); 249 250 moea64_large_page_shift = shift; 251 moea64_large_page_size = 1ULL << lp_size; 252 } 253 254 out: 255 return (0); 256 } 257 258 259 void 260 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz, 261 struct mem_region *avail, int *availsz) 262 { 263 264 ofw_mem_regions(phys, physsz, avail, availsz); 265 } 266 267 static void 268 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz) 269 { 270 271 ofw_numa_mem_regions(phys, physsz); 272 } 273 274 static u_long 275 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref) 276 { 277 char buf[8]; 278 phandle_t cpu, dev, root; 279 int res; 280 int32_t ticks = -1; 281 282 root = OF_peer(0); 283 dev = OF_child(root); 284 while (dev != 0) { 285 res = OF_getprop(dev, "name", buf, sizeof(buf)); 286 if (res > 0 && strcmp(buf, "cpus") == 0) 287 break; 288 dev = OF_peer(dev); 289 } 290 291 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 292 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 293 if (res > 0 && strcmp(buf, "cpu") == 0) 294 break; 295 } 296 if (cpu == 0) 297 return (512000000); 298 299 OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks)); 300 301 if (ticks <= 0) 302 panic("Unable to determine timebase frequency!"); 303 304 return (ticks); 305 306 } 307 308 static int 309 powernv_cpuref_init(void) 310 { 311 phandle_t cpu, dev; 312 char buf[32]; 313 int a, res, tmp_cpuref_cnt; 314 static struct cpuref tmp_cpuref[MAXCPU]; 315 cell_t interrupt_servers[32]; 316 uint64_t bsp; 317 318 if (platform_cpuref_valid) 319 return (0); 320 321 dev = OF_peer(0); 322 dev = OF_child(dev); 323 while (dev != 0) { 324 res = OF_getprop(dev, "name", buf, sizeof(buf)); 325 if (res > 0 && strcmp(buf, "cpus") == 0) 326 break; 327 dev = OF_peer(dev); 328 } 329 330 bsp = 0; 331 tmp_cpuref_cnt = 0; 332 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 333 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 334 if (res > 0 && strcmp(buf, "cpu") == 0) { 335 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 336 if (res > 0) { 337 OF_getencprop(cpu, "ibm,ppc-interrupt-server#s", 338 interrupt_servers, res); 339 340 for (a = 0; a < res/sizeof(cell_t); a++) { 341 tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a]; 342 tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt; 343 tmp_cpuref[tmp_cpuref_cnt].cr_domain = 344 powernv_node_numa_domain(NULL, cpu); 345 if (interrupt_servers[a] == (uint32_t)powernv_boot_pir) 346 bsp = tmp_cpuref_cnt; 347 348 tmp_cpuref_cnt++; 349 } 350 } 351 } 352 } 353 354 /* Map IDs, so BSP has CPUID 0 regardless of hwref */ 355 for (a = bsp; a < tmp_cpuref_cnt; a++) { 356 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 357 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 358 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 359 platform_cpuref_cnt++; 360 } 361 for (a = 0; a < bsp; a++) { 362 platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref; 363 platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt; 364 platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain; 365 platform_cpuref_cnt++; 366 } 367 368 platform_cpuref_valid = 1; 369 370 return (0); 371 } 372 373 static int 374 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref) 375 { 376 if (platform_cpuref_valid == 0) 377 return (EINVAL); 378 379 cpuref->cr_cpuid = 0; 380 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 381 cpuref->cr_domain = platform_cpuref[0].cr_domain; 382 383 return (0); 384 } 385 386 static int 387 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref) 388 { 389 int id; 390 391 if (platform_cpuref_valid == 0) 392 return (EINVAL); 393 394 id = cpuref->cr_cpuid + 1; 395 if (id >= platform_cpuref_cnt) 396 return (ENOENT); 397 398 cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid; 399 cpuref->cr_hwref = platform_cpuref[id].cr_hwref; 400 cpuref->cr_domain = platform_cpuref[id].cr_domain; 401 402 return (0); 403 } 404 405 static int 406 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref) 407 { 408 409 cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid; 410 cpuref->cr_hwref = platform_cpuref[0].cr_hwref; 411 cpuref->cr_domain = platform_cpuref[0].cr_domain; 412 return (0); 413 } 414 415 #ifdef SMP 416 static int 417 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc) 418 { 419 int result; 420 421 ap_pcpu = pc; 422 powerpc_sync(); 423 424 result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST); 425 if (result != OPAL_SUCCESS) { 426 printf("OPAL error (%d): unable to start AP %d\n", 427 result, (int)pc->pc_hwref); 428 return (ENXIO); 429 } 430 431 return (0); 432 } 433 434 static void 435 powernv_smp_probe_threads(platform_t plat) 436 { 437 char buf[8]; 438 phandle_t cpu, dev, root; 439 int res, nthreads; 440 441 root = OF_peer(0); 442 443 dev = OF_child(root); 444 while (dev != 0) { 445 res = OF_getprop(dev, "name", buf, sizeof(buf)); 446 if (res > 0 && strcmp(buf, "cpus") == 0) 447 break; 448 dev = OF_peer(dev); 449 } 450 451 nthreads = 1; 452 for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) { 453 res = OF_getprop(cpu, "device_type", buf, sizeof(buf)); 454 if (res <= 0 || strcmp(buf, "cpu") != 0) 455 continue; 456 457 res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s"); 458 459 if (res >= 0) 460 nthreads = res / sizeof(cell_t); 461 else 462 nthreads = 1; 463 break; 464 } 465 466 smp_threads_per_core = nthreads; 467 if (mp_ncpus % nthreads == 0) 468 mp_ncores = mp_ncpus / nthreads; 469 } 470 471 static struct cpu_group * 472 powernv_smp_topo(platform_t plat) 473 { 474 if (mp_ncpus % smp_threads_per_core != 0) { 475 printf("WARNING: Irregular SMP topology. Performance may be " 476 "suboptimal (%d threads, %d on first core)\n", 477 mp_ncpus, smp_threads_per_core); 478 return (smp_topo_none()); 479 } 480 481 /* Don't do anything fancier for non-threaded SMP */ 482 if (smp_threads_per_core == 1) 483 return (smp_topo_none()); 484 485 return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core, 486 CG_FLAG_SMT)); 487 } 488 489 #endif 490 491 static void 492 powernv_reset(platform_t platform) 493 { 494 495 opal_call(OPAL_CEC_REBOOT); 496 } 497 498 static void 499 powernv_smp_ap_init(platform_t platform) 500 { 501 502 if (powernv_smp_ap_extra_init != NULL) 503 powernv_smp_ap_extra_init(); 504 } 505 506 static void 507 powernv_cpu_idle(sbintime_t sbt) 508 { 509 } 510 511 static int 512 powernv_node_numa_domain(platform_t platform, phandle_t node) 513 { 514 /* XXX: Is locking necessary in here? */ 515 static int numa_domains[MAXMEMDOM]; 516 static int numa_max_domain; 517 cell_t associativity[5]; 518 int i, res; 519 520 #ifndef NUMA 521 return (0); 522 #endif 523 if (vm_ndomains == 1) 524 return (0); 525 526 res = OF_getencprop(node, "ibm,associativity", 527 associativity, sizeof(associativity)); 528 529 /* 530 * If this node doesn't have associativity, or if there are not 531 * enough elements in it, check its parent. 532 */ 533 if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) { 534 node = OF_parent(node); 535 /* If already at the root, use default domain. */ 536 if (node == 0) 537 return (0); 538 return (powernv_node_numa_domain(platform, node)); 539 } 540 541 for (i = 0; i < numa_max_domain; i++) { 542 if (numa_domains[i] == associativity[platform_associativity]) 543 return (i); 544 } 545 if (i < MAXMEMDOM) 546 numa_domains[numa_max_domain++] = 547 associativity[platform_associativity]; 548 else 549 i = 0; 550 551 return (i); 552 } 553 554 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */ 555 static void 556 powernv_setup_nmmu(void *unused) 557 { 558 if (opal_check() != 0) 559 return; 560 opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR)); 561 } 562 563 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL); 564