xref: /titanic_41/usr/src/uts/sun4v/os/mach_startup.c (revision a31148363f598def767ac48c5d82e1572e44b935)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/machsystm.h>
28 #include <sys/archsystm.h>
29 #include <sys/prom_plat.h>
30 #include <sys/promif.h>
31 #include <sys/vm.h>
32 #include <sys/cpu.h>
33 #include <sys/bitset.h>
34 #include <sys/cpupart.h>
35 #include <sys/disp.h>
36 #include <sys/hypervisor_api.h>
37 #include <sys/traptrace.h>
38 #include <sys/modctl.h>
39 #include <sys/ldoms.h>
40 #include <sys/cpu_module.h>
41 #include <sys/mutex_impl.h>
42 #include <sys/rwlock.h>
43 #include <sys/sdt.h>
44 #include <sys/cmt.h>
45 #include <vm/vm_dep.h>
46 
47 #ifdef TRAPTRACE
48 int mach_htraptrace_enable = 1;
49 #else
50 int mach_htraptrace_enable = 0;
51 #endif
52 int htrap_tr0_inuse = 0;
53 extern char htrap_tr0[];	/* prealloc buf for boot cpu */
54 
55 caddr_t	mmu_fault_status_area;
56 
57 extern void sfmmu_set_tsbs(void);
58 /*
59  * CPU IDLE optimization variables/routines
60  */
61 static int enable_halt_idle_cpus = 1;
62 
63 /*
64  * Defines for the idle_state_transition DTrace probe
65  *
66  * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
67  * The agument passed is the state to which the CPU is transitioning.
68  *
69  * The states are defined here.
70  */
71 #define	IDLE_STATE_NORMAL 0
72 #define	IDLE_STATE_YIELDED 1
73 
74 #define	SUN4V_CLOCK_TICK_THRESHOLD	64
75 #define	SUN4V_CLOCK_TICK_NCPUS		64
76 
77 extern int	clock_tick_threshold;
78 extern int	clock_tick_ncpus;
79 
80 void
81 setup_trap_table(void)
82 {
83 	caddr_t mmfsa_va;
84 	extern	 caddr_t mmu_fault_status_area;
85 	mmfsa_va =
86 	    mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
87 
88 	intr_init(CPU);		/* init interrupt request free list */
89 	setwstate(WSTATE_KERN);
90 	set_mmfsa_scratchpad(mmfsa_va);
91 	prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
92 	sfmmu_set_tsbs();
93 }
94 
95 void
96 phys_install_has_changed(void)
97 {
98 
99 }
100 
101 /*
102  * Halt the present CPU until awoken via an interrupt
103  */
104 static void
105 cpu_halt(void)
106 {
107 	cpu_t *cpup = CPU;
108 	processorid_t cpu_sid = cpup->cpu_seqid;
109 	cpupart_t *cp = cpup->cpu_part;
110 	int hset_update = 1;
111 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
112 	uint_t s;
113 
114 	/*
115 	 * If this CPU is online then we should notate our halting
116 	 * by adding ourselves to the partition's halted CPU
117 	 * bitset. This allows other CPUs to find/awaken us when
118 	 * work becomes available.
119 	 */
120 	if (CPU->cpu_flags & CPU_OFFLINE)
121 		hset_update = 0;
122 
123 	/*
124 	 * Add ourselves to the partition's halted CPUs bitset
125 	 * and set our HALTED flag, if necessary.
126 	 *
127 	 * When a thread becomes runnable, it is placed on the queue
128 	 * and then the halted cpu bitset is checked to determine who
129 	 * (if anyone) should be awoken. We therefore need to first
130 	 * add ourselves to the halted bitset, and then check if there
131 	 * is any work available.  The order is important to prevent a race
132 	 * that can lead to work languishing on a run queue somewhere while
133 	 * this CPU remains halted.
134 	 *
135 	 * Either the producing CPU will see we're halted and will awaken us,
136 	 * or this CPU will see the work available in disp_anywork()
137 	 */
138 	if (hset_update) {
139 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
140 		membar_producer();
141 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
142 	}
143 
144 	/*
145 	 * Check to make sure there's really nothing to do.
146 	 * Work destined for this CPU may become available after
147 	 * this check. We'll be notified through the clearing of our
148 	 * bit in the halted CPU bitset, and a poke.
149 	 */
150 	if (disp_anywork()) {
151 		if (hset_update) {
152 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
153 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
154 		}
155 		return;
156 	}
157 
158 	/*
159 	 * We're on our way to being halted.  Wait until something becomes
160 	 * runnable locally or we are awaken (i.e. removed from the halt set).
161 	 * Note that the call to hv_cpu_yield() can return even if we have
162 	 * nothing to do.
163 	 *
164 	 * Disable interrupts now, so that we'll awaken immediately
165 	 * after halting if someone tries to poke us between now and
166 	 * the time we actually halt.
167 	 *
168 	 * We check for the presence of our bit after disabling interrupts.
169 	 * If it's cleared, we'll return. If the bit is cleared after
170 	 * we check then the poke will pop us out of the halted state.
171 	 * Also, if the offlined CPU has been brought back on-line, then
172 	 * we return as well.
173 	 *
174 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
175 	 * is important.
176 	 * cpu_wakeup() must clear, then poke.
177 	 * cpu_halt() must disable interrupts, then check for the bit.
178 	 *
179 	 * The check for anything locally runnable is here for performance
180 	 * and isn't needed for correctness. disp_nrunnable ought to be
181 	 * in our cache still, so it's inexpensive to check, and if there
182 	 * is anything runnable we won't have to wait for the poke.
183 	 *
184 	 * Any interrupt will awaken the cpu from halt. Looping here
185 	 * will filter spurious interrupts that wake us up, but don't
186 	 * represent a need for us to head back out to idle().  This
187 	 * will enable the idle loop to be more efficient and sleep in
188 	 * the processor pipeline for a larger percent of the time,
189 	 * which returns useful cycles to the peer hardware strand
190 	 * that shares the pipeline.
191 	 */
192 	s = disable_vec_intr();
193 	while (*p == 0 &&
194 	    ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
195 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
196 
197 		DTRACE_PROBE1(idle__state__transition,
198 		    uint_t, IDLE_STATE_YIELDED);
199 		(void) hv_cpu_yield();
200 		DTRACE_PROBE1(idle__state__transition,
201 		    uint_t, IDLE_STATE_NORMAL);
202 
203 		enable_vec_intr(s);
204 		s = disable_vec_intr();
205 	}
206 
207 	/*
208 	 * We're no longer halted
209 	 */
210 	enable_vec_intr(s);
211 	if (hset_update) {
212 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
213 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
214 	}
215 }
216 
217 /*
218  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
219  * Otherwise, see if other CPUs in the cpu partition are halted and need to
220  * be woken up so that they can steal the thread we placed on this CPU.
221  * This function is only used on MP systems.
222  */
223 static void
224 cpu_wakeup(cpu_t *cpu, int bound)
225 {
226 	uint_t		cpu_found;
227 	processorid_t	cpu_sid;
228 	cpupart_t	*cp;
229 
230 	cp = cpu->cpu_part;
231 	cpu_sid = cpu->cpu_seqid;
232 	if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
233 		/*
234 		 * Clear the halted bit for that CPU since it will be
235 		 * poked in a moment.
236 		 */
237 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
238 		/*
239 		 * We may find the current CPU present in the halted cpu bitset
240 		 * if we're in the context of an interrupt that occurred
241 		 * before we had a chance to clear our bit in cpu_halt().
242 		 * Poking ourself is obviously unnecessary, since if
243 		 * we're here, we're not halted.
244 		 */
245 		if (cpu != CPU)
246 			poke_cpu(cpu->cpu_id);
247 		return;
248 	} else {
249 		/*
250 		 * This cpu isn't halted, but it's idle or undergoing a
251 		 * context switch. No need to awaken anyone else.
252 		 */
253 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
254 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
255 			return;
256 	}
257 
258 	/*
259 	 * No need to wake up other CPUs if this is for a bound thread.
260 	 */
261 	if (bound)
262 		return;
263 
264 	/*
265 	 * The CPU specified for wakeup isn't currently halted, so check
266 	 * to see if there are any other halted CPUs in the partition,
267 	 * and if there are then awaken one.
268 	 */
269 	do {
270 		cpu_found = bitset_find(&cp->cp_haltset);
271 		if (cpu_found == (uint_t)-1)
272 			return;
273 	} while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
274 
275 	if (cpu_found != CPU->cpu_seqid)
276 		poke_cpu(cpu_seq[cpu_found]->cpu_id);
277 }
278 
279 void
280 mach_cpu_halt_idle(void)
281 {
282 	if (enable_halt_idle_cpus) {
283 		idle_cpu = cpu_halt;
284 		disp_enq_thread = cpu_wakeup;
285 	}
286 }
287 
288 int
289 ndata_alloc_mmfsa(struct memlist *ndata)
290 {
291 	size_t	size;
292 
293 	size = MMFSA_SIZE * max_ncpus;
294 	mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
295 	if (mmu_fault_status_area == NULL)
296 		return (-1);
297 	return (0);
298 }
299 
300 void
301 mach_memscrub(void)
302 {
303 	/* no memscrub support for sun4v for now */
304 }
305 
306 void
307 mach_fpras()
308 {
309 	/* no fpras support for sun4v for now */
310 }
311 
312 void
313 mach_hw_copy_limit(void)
314 {
315 	/* HW copy limits set by individual CPU module */
316 }
317 
318 /*
319  * We need to enable soft ring functionality on Niagara platforms since
320  * one strand can't handle interrupts for a 1Gb NIC. So set the tunable
321  * mac_soft_ring_enable by default on this platform.
322  * mac_soft_ring_enable variable is defined in space.c and used by MAC
323  * module. This tunable in concert with mac_soft_ring_count (declared
324  * in mac.h) will configure the number of fanout soft rings for a link.
325  */
326 extern boolean_t mac_soft_ring_enable;
327 void
328 startup_platform(void)
329 {
330 	mac_soft_ring_enable = B_TRUE;
331 	if (clock_tick_threshold == 0)
332 		clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
333 	if (clock_tick_ncpus == 0)
334 		clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
335 	/* set per-platform constants for mutex_backoff */
336 	mutex_backoff_base = 1;
337 	mutex_cap_factor = 4;
338 	if (l2_cache_node_count() > 1) {
339 		/* VF for example */
340 		mutex_backoff_base = 2;
341 		mutex_cap_factor = 64;
342 	}
343 	rw_lock_backoff = default_lock_backoff;
344 	rw_lock_delay = default_lock_delay;
345 }
346 
347 /*
348  * This function sets up hypervisor traptrace buffer
349  * This routine is called by the boot cpu only
350  */
351 void
352 mach_htraptrace_setup(int cpuid)
353 {
354 	TRAP_TRACE_CTL	*ctlp;
355 	int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
356 
357 	if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
358 	    !htrap_tr0_inuse)) {
359 		ctlp = &trap_trace_ctl[cpuid];
360 		ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
361 		    contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
362 		if (ctlp->d.hvaddr_base == NULL) {
363 			ctlp->d.hlimit = 0;
364 			ctlp->d.hpaddr_base = NULL;
365 			cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
366 			    "traptrace buffer", cpuid);
367 		} else {
368 			ctlp->d.hlimit = HTRAP_TSIZE;
369 			ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
370 		}
371 	}
372 }
373 
374 /*
375  * This function enables or disables the hypervisor traptracing
376  */
377 void
378 mach_htraptrace_configure(int cpuid)
379 {
380 	uint64_t ret;
381 	uint64_t prev_buf, prev_bufsize;
382 	uint64_t prev_enable;
383 	uint64_t size;
384 	TRAP_TRACE_CTL	*ctlp;
385 
386 	ctlp = &trap_trace_ctl[cpuid];
387 	if (mach_htraptrace_enable) {
388 		if ((ctlp->d.hvaddr_base != NULL) &&
389 		    ((ctlp->d.hvaddr_base != htrap_tr0) ||
390 		    (!htrap_tr0_inuse))) {
391 			ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
392 			if ((ret == H_EOK) && (prev_bufsize != 0)) {
393 				cmn_err(CE_CONT,
394 				    "!cpu%d: previous HV traptrace buffer of "
395 				    "size 0x%lx at address 0x%lx", cpuid,
396 				    prev_bufsize, prev_buf);
397 			}
398 
399 			ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
400 			    ctlp->d.hlimit /
401 			    (sizeof (struct htrap_trace_record)), &size);
402 			if (ret == H_EOK) {
403 				ret = hv_ttrace_enable(\
404 				    (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
405 				if (ret != H_EOK) {
406 					cmn_err(CE_WARN,
407 					    "!cpu%d: HV traptracing not "
408 					    "enabled, ta: 0x%x returned error: "
409 					    "%ld", cpuid, TTRACE_ENABLE, ret);
410 				} else {
411 					if (ctlp->d.hvaddr_base == htrap_tr0)
412 						htrap_tr0_inuse = 1;
413 				}
414 			} else {
415 				cmn_err(CE_WARN,
416 				    "!cpu%d: HV traptrace buffer not "
417 				    "configured, ta: 0x%x returned error: %ld",
418 				    cpuid, TTRACE_BUF_CONF, ret);
419 			}
420 			/*
421 			 * set hvaddr_base to NULL when traptrace buffer
422 			 * registration fails
423 			 */
424 			if (ret != H_EOK) {
425 				ctlp->d.hvaddr_base = NULL;
426 				ctlp->d.hlimit = 0;
427 				ctlp->d.hpaddr_base = NULL;
428 			}
429 		}
430 	} else {
431 		ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
432 		if ((ret == H_EOK) && (prev_bufsize != 0)) {
433 			ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
434 			    &prev_enable);
435 			if (ret == H_EOK) {
436 				if (ctlp->d.hvaddr_base == htrap_tr0)
437 					htrap_tr0_inuse = 0;
438 				ctlp->d.hvaddr_base = NULL;
439 				ctlp->d.hlimit = 0;
440 				ctlp->d.hpaddr_base = NULL;
441 			} else
442 				cmn_err(CE_WARN,
443 				    "!cpu%d: HV traptracing is not disabled, "
444 				    "ta: 0x%x returned error: %ld",
445 				    cpuid, TTRACE_ENABLE, ret);
446 		}
447 	}
448 }
449 
450 /*
451  * This function cleans up the hypervisor traptrace buffer
452  */
453 void
454 mach_htraptrace_cleanup(int cpuid)
455 {
456 	if (mach_htraptrace_enable) {
457 		TRAP_TRACE_CTL *ctlp;
458 		caddr_t httrace_buf_va;
459 
460 		ASSERT(cpuid < max_ncpus);
461 		ctlp = &trap_trace_ctl[cpuid];
462 		httrace_buf_va = ctlp->d.hvaddr_base;
463 		if (httrace_buf_va == htrap_tr0) {
464 			bzero(httrace_buf_va, HTRAP_TSIZE);
465 		} else if (httrace_buf_va != NULL) {
466 			contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
467 		}
468 		ctlp->d.hvaddr_base = NULL;
469 		ctlp->d.hlimit = 0;
470 		ctlp->d.hpaddr_base = NULL;
471 	}
472 }
473 
474 /*
475  * Load any required machine class (sun4v) specific drivers.
476  */
477 void
478 load_mach_drivers(void)
479 {
480 	/*
481 	 * We don't want to load these LDOMs-specific
482 	 * modules if domaining is not supported.  Also,
483 	 * we must be able to run on non-LDOMs firmware.
484 	 */
485 	if (!domaining_supported())
486 		return;
487 
488 	/*
489 	 * Load the core domain services module
490 	 */
491 	if (modload("misc", "ds") == -1)
492 		cmn_err(CE_NOTE, "!'ds' module failed to load");
493 
494 	/*
495 	 * Load the rest of the domain services
496 	 */
497 	if (modload("misc", "fault_iso") == -1)
498 		cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
499 
500 	if (modload("misc", "platsvc") == -1)
501 		cmn_err(CE_NOTE, "!'platsvc' module failed to load");
502 
503 	if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
504 		cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
505 
506 	if (modload("misc", "dr_io") == -1)
507 		cmn_err(CE_NOTE, "!'dr_io' module failed to load");
508 
509 	if (modload("misc", "dr_mem") == -1)
510 		cmn_err(CE_NOTE, "!'dr_mem' module failed to load");
511 
512 	/*
513 	 * Attempt to attach any virtual device servers. These
514 	 * drivers must be loaded at start of day so that they
515 	 * can respond to any updates to the machine description.
516 	 *
517 	 * Since it is quite likely that a domain will not support
518 	 * one or more of these servers, failures are ignored.
519 	 */
520 
521 	/* virtual disk server */
522 	(void) i_ddi_attach_hw_nodes("vds");
523 
524 	/* virtual network switch */
525 	(void) i_ddi_attach_hw_nodes("vsw");
526 
527 	/* virtual console concentrator */
528 	(void) i_ddi_attach_hw_nodes("vcc");
529 }
530 
531 void
532 set_platform_defaults(void)
533 {
534 	/*
535 	 * Allow at most one context domain per 8 CPUs, which is ample for
536 	 * good performance.  Do not make this too large, because it
537 	 * increases the space consumed in the per-process sfmmu structure.
538 	 */
539 	if (max_mmu_ctxdoms == 0)
540 		max_mmu_ctxdoms = (NCPU + 7) / 8;
541 }
542