xref: /freebsd/sys/powerpc/powernv/platform_powernv.c (revision a3d9bf49b57923118c339642594246ef73872ee8)
1 /*-
2  * Copyright (c) 2015 Nathan Whitehorn
3  * Copyright (c) 2017-2018 Semihalf
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bus.h>
35 #include <sys/pcpu.h>
36 #include <sys/proc.h>
37 #include <sys/smp.h>
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 
41 #include <machine/bus.h>
42 #include <machine/cpu.h>
43 #include <machine/hid.h>
44 #include <machine/platformvar.h>
45 #include <machine/pmap.h>
46 #include <machine/rtas.h>
47 #include <machine/smp.h>
48 #include <machine/spr.h>
49 #include <machine/trap.h>
50 
51 #include <dev/ofw/openfirm.h>
52 #include <dev/ofw/ofw_bus.h>
53 #include <dev/ofw/ofw_bus_subr.h>
54 #include <machine/ofw_machdep.h>
55 #include <powerpc/aim/mmu_oea64.h>
56 
57 #include "platform_if.h"
58 #include "opal.h"
59 
60 #ifdef SMP
61 extern void *ap_pcpu;
62 #endif
63 
64 void (*powernv_smp_ap_extra_init)(void);
65 
66 static int powernv_probe(platform_t);
67 static int powernv_attach(platform_t);
68 void powernv_mem_regions(platform_t, struct mem_region *phys, int *physsz,
69     struct mem_region *avail, int *availsz);
70 static void powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz);
71 static u_long powernv_timebase_freq(platform_t, struct cpuref *cpuref);
72 static int powernv_smp_first_cpu(platform_t, struct cpuref *cpuref);
73 static int powernv_smp_next_cpu(platform_t, struct cpuref *cpuref);
74 static int powernv_smp_get_bsp(platform_t, struct cpuref *cpuref);
75 static void powernv_smp_ap_init(platform_t);
76 #ifdef SMP
77 static int powernv_smp_start_cpu(platform_t, struct pcpu *cpu);
78 static void powernv_smp_probe_threads(platform_t);
79 static struct cpu_group *powernv_smp_topo(platform_t plat);
80 #endif
81 static void powernv_reset(platform_t);
82 static void powernv_cpu_idle(sbintime_t sbt);
83 static int powernv_cpuref_init(void);
84 static int powernv_node_numa_domain(platform_t platform, phandle_t node);
85 
86 static platform_method_t powernv_methods[] = {
87 	PLATFORMMETHOD(platform_probe, 		powernv_probe),
88 	PLATFORMMETHOD(platform_attach,		powernv_attach),
89 	PLATFORMMETHOD(platform_mem_regions,	powernv_mem_regions),
90 	PLATFORMMETHOD(platform_numa_mem_regions,	powernv_numa_mem_regions),
91 	PLATFORMMETHOD(platform_timebase_freq,	powernv_timebase_freq),
92 
93 	PLATFORMMETHOD(platform_smp_ap_init,	powernv_smp_ap_init),
94 	PLATFORMMETHOD(platform_smp_first_cpu,	powernv_smp_first_cpu),
95 	PLATFORMMETHOD(platform_smp_next_cpu,	powernv_smp_next_cpu),
96 	PLATFORMMETHOD(platform_smp_get_bsp,	powernv_smp_get_bsp),
97 #ifdef SMP
98 	PLATFORMMETHOD(platform_smp_start_cpu,	powernv_smp_start_cpu),
99 	PLATFORMMETHOD(platform_smp_probe_threads,	powernv_smp_probe_threads),
100 	PLATFORMMETHOD(platform_smp_topo,	powernv_smp_topo),
101 #endif
102 	PLATFORMMETHOD(platform_node_numa_domain,	powernv_node_numa_domain),
103 
104 	PLATFORMMETHOD(platform_reset,		powernv_reset),
105 	{ 0, 0 }
106 };
107 
108 static platform_def_t powernv_platform = {
109 	"powernv",
110 	powernv_methods,
111 	0
112 };
113 
114 static struct cpuref platform_cpuref[MAXCPU];
115 static int platform_cpuref_cnt;
116 static int platform_cpuref_valid;
117 static int platform_associativity;
118 
119 PLATFORM_DEF(powernv_platform);
120 
121 static uint64_t powernv_boot_pir;
122 
123 static int
124 powernv_probe(platform_t plat)
125 {
126 	if (opal_check() == 0)
127 		return (BUS_PROBE_SPECIFIC);
128 
129 	return (ENXIO);
130 }
131 
132 static int
133 powernv_attach(platform_t plat)
134 {
135 	uint32_t nptlp, shift = 0, slb_encoding = 0;
136 	int32_t lp_size, lp_encoding;
137 	char buf[255];
138 	pcell_t refpoints[3];
139 	pcell_t prop;
140 	phandle_t cpu;
141 	phandle_t opal;
142 	int res, len, idx;
143 	register_t msr;
144 
145 	/* Ping OPAL again just to make sure */
146 	opal_check();
147 
148 #if BYTE_ORDER == LITTLE_ENDIAN
149 	opal_call(OPAL_REINIT_CPUS, 2 /* Little endian */);
150 #else
151 	opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);
152 #endif
153 	opal = OF_finddevice("/ibm,opal");
154 
155 	platform_associativity = 4; /* Skiboot default. */
156 	if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,
157 	    sizeof(refpoints)) > 0) {
158 		platform_associativity = refpoints[0];
159 	}
160 
161        if (cpu_idle_hook == NULL)
162                 cpu_idle_hook = powernv_cpu_idle;
163 
164 	powernv_boot_pir = mfspr(SPR_PIR);
165 
166 	/* LPID must not be altered when PSL_DR or PSL_IR is set */
167 	msr = mfmsr();
168 	mtmsr(msr & ~(PSL_DR | PSL_IR));
169 
170 	/* Direct interrupts to SRR instead of HSRR and reset LPCR otherwise */
171 	mtspr(SPR_LPID, 0);
172 	isync();
173 
174 	if (cpu_features2 & PPC_FEATURE2_ARCH_3_00)
175 		lpcr |= LPCR_HVICE;
176 
177 #if BYTE_ORDER == LITTLE_ENDIAN
178 	lpcr |= LPCR_ILE;
179 #endif
180 
181 	mtspr(SPR_LPCR, lpcr);
182 	isync();
183 
184 	mtmsr(msr);
185 
186 	powernv_cpuref_init();
187 
188 	/* Set SLB count from device tree */
189 	cpu = OF_peer(0);
190 	cpu = OF_child(cpu);
191 	while (cpu != 0) {
192 		res = OF_getprop(cpu, "name", buf, sizeof(buf));
193 		if (res > 0 && strcmp(buf, "cpus") == 0)
194 			break;
195 		cpu = OF_peer(cpu);
196 	}
197 	if (cpu == 0)
198 		goto out;
199 
200 	cpu = OF_child(cpu);
201 	while (cpu != 0) {
202 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
203 		if (res > 0 && strcmp(buf, "cpu") == 0)
204 			break;
205 		cpu = OF_peer(cpu);
206 	}
207 	if (cpu == 0)
208 		goto out;
209 
210 	res = OF_getencprop(cpu, "ibm,slb-size", &prop, sizeof(prop));
211 	if (res > 0)
212 		n_slbs = prop;
213 
214 	/*
215 	 * Scan the large page size property for PAPR compatible machines.
216 	 * See PAPR D.5 Changes to Section 5.1.4, 'CPU Node Properties'
217 	 * for the encoding of the property.
218 	 */
219 
220 	len = OF_getproplen(cpu, "ibm,segment-page-sizes");
221 	if (len > 0) {
222 		/*
223 		 * We have to use a variable length array on the stack
224 		 * since we have very limited stack space.
225 		 */
226 		pcell_t arr[len/sizeof(cell_t)];
227 		res = OF_getencprop(cpu, "ibm,segment-page-sizes", arr,
228 		    sizeof(arr));
229 		len /= 4;
230 		idx = 0;
231 		while (len > 0) {
232 			shift = arr[idx];
233 			slb_encoding = arr[idx + 1];
234 			nptlp = arr[idx + 2];
235 			idx += 3;
236 			len -= 3;
237 			while (len > 0 && nptlp) {
238 				lp_size = arr[idx];
239 				lp_encoding = arr[idx+1];
240 				if (slb_encoding == SLBV_L && lp_encoding == 0)
241 					break;
242 
243 				idx += 2;
244 				len -= 2;
245 				nptlp--;
246 			}
247 			if (nptlp && slb_encoding == SLBV_L && lp_encoding == 0)
248 				break;
249 		}
250 
251 		if (len == 0)
252 			panic("Standard large pages (SLB[L] = 1, PTE[LP] = 0) "
253 			    "not supported by this system.");
254 
255 		moea64_large_page_shift = shift;
256 		moea64_large_page_size = 1ULL << lp_size;
257 	}
258 
259 out:
260 	return (0);
261 }
262 
263 void
264 powernv_mem_regions(platform_t plat, struct mem_region *phys, int *physsz,
265     struct mem_region *avail, int *availsz)
266 {
267 
268 	ofw_mem_regions(phys, physsz, avail, availsz);
269 }
270 
271 static void
272 powernv_numa_mem_regions(platform_t plat, struct numa_mem_region *phys, int *physsz)
273 {
274 
275 	ofw_numa_mem_regions(phys, physsz);
276 }
277 
278 static u_long
279 powernv_timebase_freq(platform_t plat, struct cpuref *cpuref)
280 {
281 	char buf[8];
282 	phandle_t cpu, dev, root;
283 	int res;
284 	int32_t ticks = -1;
285 
286 	root = OF_peer(0);
287 	dev = OF_child(root);
288 	while (dev != 0) {
289 		res = OF_getprop(dev, "name", buf, sizeof(buf));
290 		if (res > 0 && strcmp(buf, "cpus") == 0)
291 			break;
292 		dev = OF_peer(dev);
293 	}
294 
295 	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
296 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
297 		if (res > 0 && strcmp(buf, "cpu") == 0)
298 			break;
299 	}
300 	if (cpu == 0)
301 		return (512000000);
302 
303 	OF_getencprop(cpu, "timebase-frequency", &ticks, sizeof(ticks));
304 
305 	if (ticks <= 0)
306 		panic("Unable to determine timebase frequency!");
307 
308 	return (ticks);
309 
310 }
311 
312 static int
313 powernv_cpuref_init(void)
314 {
315 	phandle_t cpu, dev;
316 	char buf[32];
317 	int a, res, tmp_cpuref_cnt;
318 	static struct cpuref tmp_cpuref[MAXCPU];
319 	cell_t interrupt_servers[32];
320 	uint64_t bsp;
321 
322 	if (platform_cpuref_valid)
323 		return (0);
324 
325 	dev = OF_peer(0);
326 	dev = OF_child(dev);
327 	while (dev != 0) {
328 		res = OF_getprop(dev, "name", buf, sizeof(buf));
329 		if (res > 0 && strcmp(buf, "cpus") == 0)
330 			break;
331 		dev = OF_peer(dev);
332 	}
333 
334 	bsp = 0;
335 	tmp_cpuref_cnt = 0;
336 	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
337 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
338 		if (res > 0 && strcmp(buf, "cpu") == 0) {
339 			if (!ofw_bus_node_status_okay(cpu))
340 				continue;
341 			res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
342 			if (res > 0) {
343 				OF_getencprop(cpu, "ibm,ppc-interrupt-server#s",
344 				    interrupt_servers, res);
345 
346 				for (a = 0; a < res/sizeof(cell_t); a++) {
347 					tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
348 					tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
349 					tmp_cpuref[tmp_cpuref_cnt].cr_domain =
350 					    powernv_node_numa_domain(NULL, cpu);
351 					if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
352 						bsp = tmp_cpuref_cnt;
353 
354 					tmp_cpuref_cnt++;
355 				}
356 			}
357 		}
358 	}
359 
360 	/* Map IDs, so BSP has CPUID 0 regardless of hwref */
361 	for (a = bsp; a < tmp_cpuref_cnt; a++) {
362 		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
363 		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
364 		platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
365 		platform_cpuref_cnt++;
366 	}
367 	for (a = 0; a < bsp; a++) {
368 		platform_cpuref[platform_cpuref_cnt].cr_hwref = tmp_cpuref[a].cr_hwref;
369 		platform_cpuref[platform_cpuref_cnt].cr_cpuid = platform_cpuref_cnt;
370 		platform_cpuref[platform_cpuref_cnt].cr_domain = tmp_cpuref[a].cr_domain;
371 		platform_cpuref_cnt++;
372 	}
373 
374 	platform_cpuref_valid = 1;
375 
376 	return (0);
377 }
378 
379 static int
380 powernv_smp_first_cpu(platform_t plat, struct cpuref *cpuref)
381 {
382 	if (platform_cpuref_valid == 0)
383 		return (EINVAL);
384 
385 	cpuref->cr_cpuid = 0;
386 	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
387 	cpuref->cr_domain = platform_cpuref[0].cr_domain;
388 
389 	return (0);
390 }
391 
392 static int
393 powernv_smp_next_cpu(platform_t plat, struct cpuref *cpuref)
394 {
395 	int id;
396 
397 	if (platform_cpuref_valid == 0)
398 		return (EINVAL);
399 
400 	id = cpuref->cr_cpuid + 1;
401 	if (id >= platform_cpuref_cnt)
402 		return (ENOENT);
403 
404 	cpuref->cr_cpuid = platform_cpuref[id].cr_cpuid;
405 	cpuref->cr_hwref = platform_cpuref[id].cr_hwref;
406 	cpuref->cr_domain = platform_cpuref[id].cr_domain;
407 
408 	return (0);
409 }
410 
411 static int
412 powernv_smp_get_bsp(platform_t plat, struct cpuref *cpuref)
413 {
414 
415 	cpuref->cr_cpuid = platform_cpuref[0].cr_cpuid;
416 	cpuref->cr_hwref = platform_cpuref[0].cr_hwref;
417 	cpuref->cr_domain = platform_cpuref[0].cr_domain;
418 	return (0);
419 }
420 
421 #ifdef SMP
422 static int
423 powernv_smp_start_cpu(platform_t plat, struct pcpu *pc)
424 {
425 	int result;
426 
427 	ap_pcpu = pc;
428 	powerpc_sync();
429 
430 	result = opal_call(OPAL_START_CPU, pc->pc_hwref, EXC_RST);
431 	if (result != OPAL_SUCCESS) {
432 		printf("OPAL error (%d): unable to start AP %d\n",
433 		    result, (int)pc->pc_hwref);
434 		return (ENXIO);
435 	}
436 
437 	return (0);
438 }
439 
440 static void
441 powernv_smp_probe_threads(platform_t plat)
442 {
443 	char buf[8];
444 	phandle_t cpu, dev, root;
445 	int res, nthreads;
446 
447 	root = OF_peer(0);
448 
449 	dev = OF_child(root);
450 	while (dev != 0) {
451 		res = OF_getprop(dev, "name", buf, sizeof(buf));
452 		if (res > 0 && strcmp(buf, "cpus") == 0)
453 			break;
454 		dev = OF_peer(dev);
455 	}
456 
457 	nthreads = 1;
458 	for (cpu = OF_child(dev); cpu != 0; cpu = OF_peer(cpu)) {
459 		res = OF_getprop(cpu, "device_type", buf, sizeof(buf));
460 		if (res <= 0 || strcmp(buf, "cpu") != 0)
461 			continue;
462 
463 		res = OF_getproplen(cpu, "ibm,ppc-interrupt-server#s");
464 
465 		if (res >= 0)
466 			nthreads = res / sizeof(cell_t);
467 		else
468 			nthreads = 1;
469 		break;
470 	}
471 
472 	smp_threads_per_core = nthreads;
473 	if (mp_ncpus % nthreads == 0)
474 		mp_ncores = mp_ncpus / nthreads;
475 }
476 
477 static struct cpu_group *
478 powernv_smp_topo(platform_t plat)
479 {
480 	if (mp_ncpus % smp_threads_per_core != 0) {
481 		printf("WARNING: Irregular SMP topology. Performance may be "
482 		     "suboptimal (%d threads, %d on first core)\n",
483 		     mp_ncpus, smp_threads_per_core);
484 		return (smp_topo_none());
485 	}
486 
487 	/* Don't do anything fancier for non-threaded SMP */
488 	if (smp_threads_per_core == 1)
489 		return (smp_topo_none());
490 
491 	return (smp_topo_1level(CG_SHARE_L1, smp_threads_per_core,
492 	    CG_FLAG_SMT));
493 }
494 
495 #endif
496 
497 static void
498 powernv_reset(platform_t platform)
499 {
500 
501 	opal_call(OPAL_CEC_REBOOT);
502 }
503 
504 static void
505 powernv_smp_ap_init(platform_t platform)
506 {
507 
508 	if (powernv_smp_ap_extra_init != NULL)
509 		powernv_smp_ap_extra_init();
510 }
511 
512 static void
513 powernv_cpu_idle(sbintime_t sbt)
514 {
515 }
516 
517 static int
518 powernv_node_numa_domain(platform_t platform, phandle_t node)
519 {
520 	/* XXX: Is locking necessary in here? */
521 	static int numa_domains[MAXMEMDOM];
522 	static int numa_max_domain;
523 	cell_t associativity[5];
524 	int i, res;
525 
526 #ifndef NUMA
527 	return (0);
528 #endif
529 	if (vm_ndomains == 1)
530 		return (0);
531 
532 	res = OF_getencprop(node, "ibm,associativity",
533 		associativity, sizeof(associativity));
534 
535 	/*
536 	 * If this node doesn't have associativity, or if there are not
537 	 * enough elements in it, check its parent.
538 	 */
539 	if (res < (int)(sizeof(cell_t) * (platform_associativity + 1))) {
540 		node = OF_parent(node);
541 		/* If already at the root, use default domain. */
542 		if (node == 0)
543 			return (0);
544 		return (powernv_node_numa_domain(platform, node));
545 	}
546 
547 	for (i = 0; i < numa_max_domain; i++) {
548 		if (numa_domains[i] == associativity[platform_associativity])
549 			return (i);
550 	}
551 	if (i < MAXMEMDOM)
552 		numa_domains[numa_max_domain++] =
553 		    associativity[platform_associativity];
554 	else
555 		i = 0;
556 
557 	return (i);
558 }
559 
560 /* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */
561 static void
562 powernv_setup_nmmu(void *unused)
563 {
564 	if (opal_check() != 0)
565 		return;
566 	opal_call(OPAL_NMMU_SET_PTCR, -1, mfspr(SPR_PTCR));
567 }
568 
569 SYSINIT(powernv_setup_nmmu, SI_SUB_CPU, SI_ORDER_ANY, powernv_setup_nmmu, NULL);
570