xref: /titanic_41/usr/src/uts/sun4u/os/mach_startup.c (revision ff0e937b36dcde1a47ff7b00aa76a491c0dc07a8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/machsystm.h>
27 #include <sys/archsystm.h>
28 #include <sys/vm.h>
29 #include <sys/cpu.h>
30 #include <sys/cpupart.h>
31 #include <sys/cmt.h>
32 #include <sys/bitset.h>
33 #include <sys/reboot.h>
34 #include <sys/kdi.h>
35 #include <sys/bootconf.h>
36 #include <sys/memlist_plat.h>
37 #include <sys/memlist_impl.h>
38 #include <sys/prom_plat.h>
39 #include <sys/prom_isa.h>
40 #include <sys/autoconf.h>
41 #include <sys/intreg.h>
42 #include <sys/ivintr.h>
43 #include <sys/fpu/fpusystm.h>
44 #include <sys/iommutsb.h>
45 #include <vm/vm_dep.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_kpm.h>
48 #include <vm/seg_map.h>
49 #include <vm/seg_kp.h>
50 #include <sys/sysconf.h>
51 #include <vm/hat_sfmmu.h>
52 #include <sys/kobj.h>
53 #include <sys/sun4asi.h>
54 #include <sys/clconf.h>
55 #include <sys/platform_module.h>
56 #include <sys/panic.h>
57 #include <sys/cpu_sgnblk_defs.h>
58 #include <sys/clock.h>
59 #include <sys/fpras_impl.h>
60 #include <sys/prom_debug.h>
61 #include <sys/traptrace.h>
62 #include <sys/memnode.h>
63 #include <sys/mem_cage.h>
64 
65 /*
66  * fpRAS implementation structures.
67  */
68 struct fpras_chkfn *fpras_chkfnaddrs[FPRAS_NCOPYOPS];
69 struct fpras_chkfngrp *fpras_chkfngrps;
70 struct fpras_chkfngrp *fpras_chkfngrps_base;
71 int fpras_frequency = -1;
72 int64_t fpras_interval = -1;
73 
74 /*
75  * Increase unix symbol table size as a work around for 6828121
76  */
77 int alloc_mem_bermuda_triangle;
78 
79 /*
80  * Halt idling cpus optimization
81  *
82  * This optimation is only enabled in platforms that have
83  * the CPU halt support. The cpu_halt_cpu() support is provided
84  * in the cpu module and it is referenced here with a pragma weak.
85  * The presence of this routine automatically enable the halt idling
86  * cpus functionality if the global switch enable_halt_idle_cpus
87  * is set (default is set).
88  *
89  */
90 #pragma weak	cpu_halt_cpu
91 extern void	cpu_halt_cpu();
92 
93 /*
94  * Defines for the idle_state_transition DTrace probe
95  *
96  * The probe fires when the CPU undergoes an idle state change (e.g. halting)
97  * The agument passed is the state to which the CPU is transitioning.
98  *
99  * The states are defined here.
100  */
101 #define	IDLE_STATE_NORMAL 0
102 #define	IDLE_STATE_HALTED 1
103 
104 int		enable_halt_idle_cpus = 1; /* global switch */
105 
106 void
107 setup_trap_table(void)
108 {
109 	intr_init(CPU);			/* init interrupt request free list */
110 	setwstate(WSTATE_KERN);
111 	prom_set_traptable(&trap_table);
112 }
113 
114 void
115 mach_fpras()
116 {
117 	if (fpras_implemented && !fpras_disable) {
118 		int i;
119 		struct fpras_chkfngrp *fcgp;
120 		size_t chkfngrpsallocsz;
121 
122 		/*
123 		 * Note that we size off of NCPU and setup for
124 		 * all those possibilities regardless of whether
125 		 * the cpu id is present or not.  We do this so that
126 		 * we don't have any construction or destruction
127 		 * activity to perform at DR time, and it's not
128 		 * costly in memory.  We require block alignment.
129 		 */
130 		chkfngrpsallocsz = NCPU * sizeof (struct fpras_chkfngrp);
131 		fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz, KM_SLEEP);
132 		if (IS_P2ALIGNED((uintptr_t)fpras_chkfngrps_base, 64)) {
133 			fpras_chkfngrps = fpras_chkfngrps_base;
134 		} else {
135 			kmem_free(fpras_chkfngrps_base, chkfngrpsallocsz);
136 			chkfngrpsallocsz += 64;
137 			fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz,
138 			    KM_SLEEP);
139 			fpras_chkfngrps = (struct fpras_chkfngrp *)
140 			    P2ROUNDUP((uintptr_t)fpras_chkfngrps_base, 64);
141 		}
142 
143 		/*
144 		 * Copy our check function into place for each copy operation
145 		 * and each cpu id.
146 		 */
147 		fcgp = &fpras_chkfngrps[0];
148 		for (i = 0; i < FPRAS_NCOPYOPS; ++i)
149 			bcopy((void *)fpras_chkfn_type1, &fcgp->fpras_fn[i],
150 			    sizeof (struct fpras_chkfn));
151 		for (i = 1; i < NCPU; ++i)
152 			*(&fpras_chkfngrps[i]) = *fcgp;
153 
154 		/*
155 		 * At definition fpras_frequency is set to -1, and it will
156 		 * still have that value unless changed in /etc/system (not
157 		 * strictly supported, but not preventable).  The following
158 		 * both sets the default and sanity checks anything from
159 		 * /etc/system.
160 		 */
161 		if (fpras_frequency < 0)
162 			fpras_frequency = FPRAS_DEFAULT_FREQUENCY;
163 
164 		/*
165 		 * Now calculate fpras_interval.  When fpras_interval
166 		 * becomes non-negative fpras checks will commence
167 		 * (copies before this point in boot will bypass fpras).
168 		 * Our stores of instructions must be visible; no need
169 		 * to flush as they're never been executed before.
170 		 */
171 		membar_producer();
172 		fpras_interval = (fpras_frequency == 0) ?
173 		    0 : sys_tick_freq / fpras_frequency;
174 	}
175 }
176 
177 void
178 mach_hw_copy_limit(void)
179 {
180 	if (!fpu_exists) {
181 		use_hw_bcopy = 0;
182 		hw_copy_limit_1 = 0;
183 		hw_copy_limit_2 = 0;
184 		hw_copy_limit_4 = 0;
185 		hw_copy_limit_8 = 0;
186 		use_hw_bzero = 0;
187 	}
188 }
189 
190 void
191 load_tod_module()
192 {
193 	/*
194 	 * Load tod driver module for the tod part found on this system.
195 	 * Recompute the cpu frequency/delays based on tod as tod part
196 	 * tends to keep time more accurately.
197 	 */
198 	if (tod_module_name == NULL || modload("tod", tod_module_name) == -1)
199 		halt("Can't load tod module");
200 }
201 
202 void
203 mach_memscrub(void)
204 {
205 	/*
206 	 * Startup memory scrubber, if not running fpu emulation code.
207 	 */
208 
209 #ifndef _HW_MEMSCRUB_SUPPORT
210 	if (fpu_exists) {
211 		if (memscrub_init()) {
212 			cmn_err(CE_WARN,
213 			    "Memory scrubber failed to initialize");
214 		}
215 	}
216 #endif /* _HW_MEMSCRUB_SUPPORT */
217 }
218 
219 /*
220  * Halt the present CPU until awoken via an interrupt.
221  * This routine should only be invoked if cpu_halt_cpu()
222  * exists and is supported, see mach_cpu_halt_idle()
223  */
224 void
225 cpu_halt(void)
226 {
227 	cpu_t *cpup = CPU;
228 	processorid_t cpu_sid = cpup->cpu_seqid;
229 	cpupart_t *cp = cpup->cpu_part;
230 	int hset_update = 1;
231 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
232 	uint_t s;
233 
234 	/*
235 	 * If this CPU is online then we should notate our halting
236 	 * by adding ourselves to the partition's halted CPU
237 	 * bitset. This allows other CPUs to find/awaken us when
238 	 * work becomes available.
239 	 */
240 	if (CPU->cpu_flags & CPU_OFFLINE)
241 		hset_update = 0;
242 
243 	/*
244 	 * Add ourselves to the partition's halted CPUs bitset
245 	 * and set our HALTED flag, if necessary.
246 	 *
247 	 * When a thread becomes runnable, it is placed on the queue
248 	 * and then the halted cpu bitset is checked to determine who
249 	 * (if anyone) should be awoken. We therefore need to first
250 	 * add ourselves to the halted bitset, and then check if there
251 	 * is any work available.  The order is important to prevent a race
252 	 * that can lead to work languishing on a run queue somewhere while
253 	 * this CPU remains halted.
254 	 *
255 	 * Either the producing CPU will see we're halted and will awaken us,
256 	 * or this CPU will see the work available in disp_anywork()
257 	 */
258 	if (hset_update) {
259 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
260 		membar_producer();
261 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
262 	}
263 
264 	/*
265 	 * Check to make sure there's really nothing to do.
266 	 * Work destined for this CPU may become available after
267 	 * this check. We'll be notified through the clearing of our
268 	 * bit in the halted CPU bitset, and a poke.
269 	 */
270 	if (disp_anywork()) {
271 		if (hset_update) {
272 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
273 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
274 		}
275 		return;
276 	}
277 
278 	/*
279 	 * We're on our way to being halted.  Wait until something becomes
280 	 * runnable locally or we are awaken (i.e. removed from the halt set).
281 	 * Note that the call to hv_cpu_yield() can return even if we have
282 	 * nothing to do.
283 	 *
284 	 * Disable interrupts now, so that we'll awaken immediately
285 	 * after halting if someone tries to poke us between now and
286 	 * the time we actually halt.
287 	 *
288 	 * We check for the presence of our bit after disabling interrupts.
289 	 * If it's cleared, we'll return. If the bit is cleared after
290 	 * we check then the poke will pop us out of the halted state.
291 	 * Also, if the offlined CPU has been brought back on-line, then
292 	 * we return as well.
293 	 *
294 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
295 	 * is important.
296 	 * cpu_wakeup() must clear, then poke.
297 	 * cpu_halt() must disable interrupts, then check for the bit.
298 	 *
299 	 * The check for anything locally runnable is here for performance
300 	 * and isn't needed for correctness. disp_nrunnable ought to be
301 	 * in our cache still, so it's inexpensive to check, and if there
302 	 * is anything runnable we won't have to wait for the poke.
303 	 *
304 	 * Any interrupt will awaken the cpu from halt. Looping here
305 	 * will filter spurious interrupts that wake us up, but don't
306 	 * represent a need for us to head back out to idle().  This
307 	 * will enable the idle loop to be more efficient and sleep in
308 	 * the processor pipeline for a larger percent of the time,
309 	 * which returns useful cycles to the peer hardware strand
310 	 * that shares the pipeline.
311 	 */
312 	s = disable_vec_intr();
313 	while (*p == 0 &&
314 	    ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
315 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
316 
317 		DTRACE_PROBE1(idle__state__transition,
318 		    uint_t, IDLE_STATE_HALTED);
319 		(void) cpu_halt_cpu();
320 		DTRACE_PROBE1(idle__state__transition,
321 		    uint_t, IDLE_STATE_NORMAL);
322 
323 		enable_vec_intr(s);
324 		s = disable_vec_intr();
325 	}
326 
327 	/*
328 	 * We're no longer halted
329 	 */
330 	enable_vec_intr(s);
331 	if (hset_update) {
332 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
333 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
334 	}
335 }
336 
337 /*
338  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
339  * Otherwise, see if other CPUs in the cpu partition are halted and need to
340  * be woken up so that they can steal the thread we placed on this CPU.
341  * This function is only used on MP systems.
342  * This function should only be invoked if cpu_halt_cpu()
343  * exists and is supported, see mach_cpu_halt_idle()
344  */
345 static void
346 cpu_wakeup(cpu_t *cpu, int bound)
347 {
348 	uint_t		cpu_found;
349 	processorid_t	cpu_sid;
350 	cpupart_t	*cp;
351 
352 	cp = cpu->cpu_part;
353 	cpu_sid = cpu->cpu_seqid;
354 	if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
355 		/*
356 		 * Clear the halted bit for that CPU since it will be
357 		 * poked in a moment.
358 		 */
359 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
360 		/*
361 		 * We may find the current CPU present in the halted cpu bitset
362 		 * if we're in the context of an interrupt that occurred
363 		 * before we had a chance to clear our bit in cpu_halt().
364 		 * Poking ourself is obviously unnecessary, since if
365 		 * we're here, we're not halted.
366 		 */
367 		if (cpu != CPU)
368 			poke_cpu(cpu->cpu_id);
369 		return;
370 	} else {
371 		/*
372 		 * This cpu isn't halted, but it's idle or undergoing a
373 		 * context switch. No need to awaken anyone else.
374 		 */
375 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
376 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
377 			return;
378 	}
379 
380 	/*
381 	 * No need to wake up other CPUs if this is for a bound thread.
382 	 */
383 	if (bound)
384 		return;
385 
386 	/*
387 	 * The CPU specified for wakeup isn't currently halted, so check
388 	 * to see if there are any other halted CPUs in the partition,
389 	 * and if there are then awaken one.
390 	 *
391 	 * If possible, try to select a CPU close to the target, since this
392 	 * will likely trigger a migration.
393 	 */
394 	do {
395 		cpu_found = bitset_find(&cp->cp_haltset);
396 		if (cpu_found == (uint_t)-1)
397 			return;
398 	} while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
399 
400 	if (cpu_found != CPU->cpu_seqid)
401 		poke_cpu(cpu_seq[cpu_found]->cpu_id);
402 }
403 
404 void
405 mach_cpu_halt_idle(void)
406 {
407 	if (enable_halt_idle_cpus) {
408 		if (&cpu_halt_cpu) {
409 			idle_cpu = cpu_halt;
410 			disp_enq_thread = cpu_wakeup;
411 		}
412 	}
413 }
414 
415 /*ARGSUSED*/
416 int
417 cpu_intrq_setup(struct cpu *cp)
418 {
419 	/* Interrupt mondo queues not applicable to sun4u */
420 	return (0);
421 }
422 
423 /*ARGSUSED*/
424 void
425 cpu_intrq_cleanup(struct cpu *cp)
426 {
427 	/* Interrupt mondo queues not applicable to sun4u */
428 }
429 
430 /*ARGSUSED*/
431 void
432 cpu_intrq_register(struct cpu *cp)
433 {
434 	/* Interrupt/error queues not applicable to sun4u */
435 }
436 
437 /*ARGSUSED*/
438 void
439 mach_htraptrace_setup(int cpuid)
440 {
441 	/* Setup hypervisor traptrace buffer, not applicable to sun4u */
442 }
443 
444 /*ARGSUSED*/
445 void
446 mach_htraptrace_configure(int cpuid)
447 {
448 	/* enable/ disable hypervisor traptracing, not applicable to sun4u */
449 }
450 
451 /*ARGSUSED*/
452 void
453 mach_htraptrace_cleanup(int cpuid)
454 {
455 	/* cleanup hypervisor traptrace buffer, not applicable to sun4u */
456 }
457 
458 void
459 mach_descrip_startup_init(void)
460 {
461 	/*
462 	 * Only for sun4v.
463 	 * Initialize Machine description framework during startup.
464 	 */
465 }
466 void
467 mach_descrip_startup_fini(void)
468 {
469 	/*
470 	 * Only for sun4v.
471 	 * Clean up Machine Description framework during startup.
472 	 */
473 }
474 
475 void
476 mach_descrip_init(void)
477 {
478 	/*
479 	 * Only for sun4v.
480 	 * Initialize Machine description framework.
481 	 */
482 }
483 
484 void
485 hsvc_setup(void)
486 {
487 	/* Setup hypervisor services, not applicable to sun4u */
488 }
489 
490 void
491 load_mach_drivers(void)
492 {
493 	/* Currently no machine class (sun4u) specific drivers to load */
494 }
495 
496 /*
497  * Return true if the machine we're running on is a Positron.
498  * (Positron is an unsupported developers platform.)
499  */
500 int
501 iam_positron(void)
502 {
503 	char model[32];
504 	const char proto_model[] = "SUNW,501-2732";
505 	pnode_t root = prom_rootnode();
506 
507 	if (prom_getproplen(root, "model") != sizeof (proto_model))
508 		return (0);
509 
510 	(void) prom_getprop(root, "model", model);
511 	if (strcmp(model, proto_model) == 0)
512 		return (1);
513 	return (0);
514 }
515 
516 /*
517  * Find a physically contiguous area of twice the largest ecache size
518  * to be used while doing displacement flush of ecaches.
519  */
520 uint64_t
521 ecache_flush_address(void)
522 {
523 	struct memlist *pmem;
524 	uint64_t flush_size;
525 	uint64_t ret_val;
526 
527 	flush_size = ecache_size * 2;
528 	for (pmem = phys_install; pmem; pmem = pmem->next) {
529 		ret_val = P2ROUNDUP(pmem->address, ecache_size);
530 		if (ret_val + flush_size <= pmem->address + pmem->size)
531 			return (ret_val);
532 	}
533 	return ((uint64_t)-1);
534 }
535 
536 /*
537  * Called with the memlist lock held to say that phys_install has
538  * changed.
539  */
540 void
541 phys_install_has_changed(void)
542 {
543 	/*
544 	 * Get the new address into a temporary just in case panicking
545 	 * involves use of ecache_flushaddr.
546 	 */
547 	uint64_t new_addr;
548 
549 	new_addr = ecache_flush_address();
550 	if (new_addr == (uint64_t)-1) {
551 		cmn_err(CE_PANIC,
552 		    "ecache_flush_address(): failed, ecache_size=%x",
553 		    ecache_size);
554 		/*NOTREACHED*/
555 	}
556 	ecache_flushaddr = new_addr;
557 	membar_producer();
558 }
559