xref: /illumos-gate/usr/src/uts/sun4v/os/mach_startup.c (revision c9eab9d4e096bb9b983e9b007577edfa73c32eff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/machsystm.h>
28 #include <sys/archsystm.h>
29 #include <sys/prom_plat.h>
30 #include <sys/promif.h>
31 #include <sys/vm.h>
32 #include <sys/cpu.h>
33 #include <sys/bitset.h>
34 #include <sys/cpupart.h>
35 #include <sys/disp.h>
36 #include <sys/hypervisor_api.h>
37 #include <sys/traptrace.h>
38 #include <sys/modctl.h>
39 #include <sys/ldoms.h>
40 #include <sys/cpu_module.h>
41 #include <sys/mutex_impl.h>
42 #include <sys/rwlock.h>
43 #include <sys/sdt.h>
44 #include <sys/cmt.h>
45 #include <vm/vm_dep.h>
46 
47 #ifdef TRAPTRACE
48 int mach_htraptrace_enable = 1;
49 #else
50 int mach_htraptrace_enable = 0;
51 #endif
52 int htrap_tr0_inuse = 0;
53 extern char htrap_tr0[];	/* prealloc buf for boot cpu */
54 
55 caddr_t	mmu_fault_status_area;
56 
57 extern void sfmmu_set_tsbs(void);
58 /*
59  * CPU IDLE optimization variables/routines
60  */
61 static int enable_halt_idle_cpus = 1;
62 
63 /*
64  * Defines for the idle_state_transition DTrace probe
65  *
66  * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
67  * The agument passed is the state to which the CPU is transitioning.
68  *
69  * The states are defined here.
70  */
71 #define	IDLE_STATE_NORMAL 0
72 #define	IDLE_STATE_YIELDED 1
73 
74 #define	SUN4V_CLOCK_TICK_THRESHOLD	64
75 #define	SUN4V_CLOCK_TICK_NCPUS		64
76 
77 extern int	clock_tick_threshold;
78 extern int	clock_tick_ncpus;
79 
80 void
81 setup_trap_table(void)
82 {
83 	caddr_t mmfsa_va;
84 	extern	 caddr_t mmu_fault_status_area;
85 	mmfsa_va =
86 	    mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
87 
88 	intr_init(CPU);		/* init interrupt request free list */
89 	setwstate(WSTATE_KERN);
90 	set_mmfsa_scratchpad(mmfsa_va);
91 	prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
92 	sfmmu_set_tsbs();
93 }
94 
95 void
96 phys_install_has_changed(void)
97 {
98 
99 }
100 
101 /*
102  * Halt the present CPU until awoken via an interrupt
103  */
104 static void
105 cpu_halt(void)
106 {
107 	cpu_t *cpup = CPU;
108 	processorid_t cpu_sid = cpup->cpu_seqid;
109 	cpupart_t *cp = cpup->cpu_part;
110 	int hset_update = 1;
111 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
112 	uint_t s;
113 
114 	/*
115 	 * If this CPU is online then we should notate our halting
116 	 * by adding ourselves to the partition's halted CPU
117 	 * bitset. This allows other CPUs to find/awaken us when
118 	 * work becomes available.
119 	 */
120 	if (CPU->cpu_flags & CPU_OFFLINE)
121 		hset_update = 0;
122 
123 	/*
124 	 * Add ourselves to the partition's halted CPUs bitset
125 	 * and set our HALTED flag, if necessary.
126 	 *
127 	 * When a thread becomes runnable, it is placed on the queue
128 	 * and then the halted cpu bitset is checked to determine who
129 	 * (if anyone) should be awoken. We therefore need to first
130 	 * add ourselves to the halted bitset, and then check if there
131 	 * is any work available.  The order is important to prevent a race
132 	 * that can lead to work languishing on a run queue somewhere while
133 	 * this CPU remains halted.
134 	 *
135 	 * Either the producing CPU will see we're halted and will awaken us,
136 	 * or this CPU will see the work available in disp_anywork()
137 	 */
138 	if (hset_update) {
139 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
140 		membar_producer();
141 		bitset_atomic_add(&cp->cp_haltset, cpu_sid);
142 	}
143 
144 	/*
145 	 * Check to make sure there's really nothing to do.
146 	 * Work destined for this CPU may become available after
147 	 * this check. We'll be notified through the clearing of our
148 	 * bit in the halted CPU bitset, and a poke.
149 	 */
150 	if (disp_anywork()) {
151 		if (hset_update) {
152 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
153 			bitset_atomic_del(&cp->cp_haltset, cpu_sid);
154 		}
155 		return;
156 	}
157 
158 	/*
159 	 * We're on our way to being halted.  Wait until something becomes
160 	 * runnable locally or we are awaken (i.e. removed from the halt set).
161 	 * Note that the call to hv_cpu_yield() can return even if we have
162 	 * nothing to do.
163 	 *
164 	 * Disable interrupts now, so that we'll awaken immediately
165 	 * after halting if someone tries to poke us between now and
166 	 * the time we actually halt.
167 	 *
168 	 * We check for the presence of our bit after disabling interrupts.
169 	 * If it's cleared, we'll return. If the bit is cleared after
170 	 * we check then the poke will pop us out of the halted state.
171 	 * Also, if the offlined CPU has been brought back on-line, then
172 	 * we return as well.
173 	 *
174 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
175 	 * is important.
176 	 * cpu_wakeup() must clear, then poke.
177 	 * cpu_halt() must disable interrupts, then check for the bit.
178 	 *
179 	 * The check for anything locally runnable is here for performance
180 	 * and isn't needed for correctness. disp_nrunnable ought to be
181 	 * in our cache still, so it's inexpensive to check, and if there
182 	 * is anything runnable we won't have to wait for the poke.
183 	 *
184 	 */
185 	s = disable_vec_intr();
186 	while (*p == 0 &&
187 	    ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
188 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
189 
190 		DTRACE_PROBE1(idle__state__transition,
191 		    uint_t, IDLE_STATE_YIELDED);
192 		(void) hv_cpu_yield();
193 		DTRACE_PROBE1(idle__state__transition,
194 		    uint_t, IDLE_STATE_NORMAL);
195 
196 		enable_vec_intr(s);
197 		s = disable_vec_intr();
198 	}
199 
200 	/*
201 	 * We're no longer halted
202 	 */
203 	enable_vec_intr(s);
204 	if (hset_update) {
205 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
206 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
207 	}
208 }
209 
210 /*
211  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
212  * Otherwise, see if other CPUs in the cpu partition are halted and need to
213  * be woken up so that they can steal the thread we placed on this CPU.
214  * This function is only used on MP systems.
215  */
216 static void
217 cpu_wakeup(cpu_t *cpu, int bound)
218 {
219 	uint_t		cpu_found;
220 	processorid_t	cpu_sid;
221 	cpupart_t	*cp;
222 
223 	cp = cpu->cpu_part;
224 	cpu_sid = cpu->cpu_seqid;
225 	if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
226 		/*
227 		 * Clear the halted bit for that CPU since it will be
228 		 * poked in a moment.
229 		 */
230 		bitset_atomic_del(&cp->cp_haltset, cpu_sid);
231 		/*
232 		 * We may find the current CPU present in the halted cpu bitset
233 		 * if we're in the context of an interrupt that occurred
234 		 * before we had a chance to clear our bit in cpu_halt().
235 		 * Poking ourself is obviously unnecessary, since if
236 		 * we're here, we're not halted.
237 		 */
238 		if (cpu != CPU)
239 			poke_cpu(cpu->cpu_id);
240 		return;
241 	} else {
242 		/*
243 		 * This cpu isn't halted, but it's idle or undergoing a
244 		 * context switch. No need to awaken anyone else.
245 		 */
246 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
247 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
248 			return;
249 	}
250 
251 	/*
252 	 * No need to wake up other CPUs if this is for a bound thread.
253 	 */
254 	if (bound)
255 		return;
256 
257 	/*
258 	 * The CPU specified for wakeup isn't currently halted, so check
259 	 * to see if there are any other halted CPUs in the partition,
260 	 * and if there are then awaken one.
261 	 */
262 	do {
263 		cpu_found = bitset_find(&cp->cp_haltset);
264 		if (cpu_found == (uint_t)-1)
265 			return;
266 	} while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
267 
268 	if (cpu_found != CPU->cpu_seqid)
269 		poke_cpu(cpu_seq[cpu_found]->cpu_id);
270 }
271 
272 void
273 mach_cpu_halt_idle()
274 {
275 	if (enable_halt_idle_cpus) {
276 		idle_cpu = cpu_halt;
277 		disp_enq_thread = cpu_wakeup;
278 	}
279 }
280 
281 int
282 ndata_alloc_mmfsa(struct memlist *ndata)
283 {
284 	size_t	size;
285 
286 	size = MMFSA_SIZE * max_ncpus;
287 	mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
288 	if (mmu_fault_status_area == NULL)
289 		return (-1);
290 	return (0);
291 }
292 
293 void
294 mach_memscrub(void)
295 {
296 	/* no memscrub support for sun4v for now */
297 }
298 
299 void
300 mach_fpras()
301 {
302 	/* no fpras support for sun4v for now */
303 }
304 
305 void
306 mach_hw_copy_limit(void)
307 {
308 	/* HW copy limits set by individual CPU module */
309 }
310 
311 /*
312  * We need to enable soft ring functionality on Niagara platforms since
313  * one strand can't handle interrupts for a 1Gb NIC. So set the tunable
314  * mac_soft_ring_enable by default on this platform.
315  * mac_soft_ring_enable variable is defined in space.c and used by MAC
316  * module. This tunable in concert with mac_soft_ring_count (declared
317  * in mac.h) will configure the number of fanout soft rings for a link.
318  */
319 extern boolean_t mac_soft_ring_enable;
320 void
321 startup_platform(void)
322 {
323 	mac_soft_ring_enable = B_TRUE;
324 	if (clock_tick_threshold == 0)
325 		clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
326 	if (clock_tick_ncpus == 0)
327 		clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
328 	/* set per-platform constants for mutex_backoff */
329 	mutex_backoff_base = 1;
330 	mutex_cap_factor = 4;
331 	if (l2_cache_node_count() > 1) {
332 		/* VF for example */
333 		mutex_backoff_base = 2;
334 		mutex_cap_factor = 64;
335 	}
336 	rw_lock_backoff = default_lock_backoff;
337 	rw_lock_delay = default_lock_delay;
338 }
339 
340 /*
341  * This function sets up hypervisor traptrace buffer
342  * This routine is called by the boot cpu only
343  */
344 void
345 mach_htraptrace_setup(int cpuid)
346 {
347 	TRAP_TRACE_CTL	*ctlp;
348 	int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
349 
350 	if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
351 	    !htrap_tr0_inuse)) {
352 		ctlp = &trap_trace_ctl[cpuid];
353 		ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
354 		    contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
355 		if (ctlp->d.hvaddr_base == NULL) {
356 			ctlp->d.hlimit = 0;
357 			ctlp->d.hpaddr_base = NULL;
358 			cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
359 			    "traptrace buffer", cpuid);
360 		} else {
361 			ctlp->d.hlimit = HTRAP_TSIZE;
362 			ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
363 		}
364 	}
365 }
366 
367 /*
368  * This function enables or disables the hypervisor traptracing
369  */
370 void
371 mach_htraptrace_configure(int cpuid)
372 {
373 	uint64_t ret;
374 	uint64_t prev_buf, prev_bufsize;
375 	uint64_t prev_enable;
376 	uint64_t size;
377 	TRAP_TRACE_CTL	*ctlp;
378 
379 	ctlp = &trap_trace_ctl[cpuid];
380 	if (mach_htraptrace_enable) {
381 		if ((ctlp->d.hvaddr_base != NULL) &&
382 		    ((ctlp->d.hvaddr_base != htrap_tr0) ||
383 		    (!htrap_tr0_inuse))) {
384 			ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
385 			if ((ret == H_EOK) && (prev_bufsize != 0)) {
386 				cmn_err(CE_CONT,
387 				    "!cpu%d: previous HV traptrace buffer of "
388 				    "size 0x%lx at address 0x%lx", cpuid,
389 				    prev_bufsize, prev_buf);
390 			}
391 
392 			ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
393 			    ctlp->d.hlimit /
394 			    (sizeof (struct htrap_trace_record)), &size);
395 			if (ret == H_EOK) {
396 				ret = hv_ttrace_enable(\
397 				    (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
398 				if (ret != H_EOK) {
399 					cmn_err(CE_WARN,
400 					    "!cpu%d: HV traptracing not "
401 					    "enabled, ta: 0x%x returned error: "
402 					    "%ld", cpuid, TTRACE_ENABLE, ret);
403 				} else {
404 					if (ctlp->d.hvaddr_base == htrap_tr0)
405 						htrap_tr0_inuse = 1;
406 				}
407 			} else {
408 				cmn_err(CE_WARN,
409 				    "!cpu%d: HV traptrace buffer not "
410 				    "configured, ta: 0x%x returned error: %ld",
411 				    cpuid, TTRACE_BUF_CONF, ret);
412 			}
413 			/*
414 			 * set hvaddr_base to NULL when traptrace buffer
415 			 * registration fails
416 			 */
417 			if (ret != H_EOK) {
418 				ctlp->d.hvaddr_base = NULL;
419 				ctlp->d.hlimit = 0;
420 				ctlp->d.hpaddr_base = NULL;
421 			}
422 		}
423 	} else {
424 		ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
425 		if ((ret == H_EOK) && (prev_bufsize != 0)) {
426 			ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
427 			    &prev_enable);
428 			if (ret == H_EOK) {
429 				if (ctlp->d.hvaddr_base == htrap_tr0)
430 					htrap_tr0_inuse = 0;
431 				ctlp->d.hvaddr_base = NULL;
432 				ctlp->d.hlimit = 0;
433 				ctlp->d.hpaddr_base = NULL;
434 			} else
435 				cmn_err(CE_WARN,
436 				    "!cpu%d: HV traptracing is not disabled, "
437 				    "ta: 0x%x returned error: %ld",
438 				    cpuid, TTRACE_ENABLE, ret);
439 		}
440 	}
441 }
442 
443 /*
444  * This function cleans up the hypervisor traptrace buffer
445  */
446 void
447 mach_htraptrace_cleanup(int cpuid)
448 {
449 	if (mach_htraptrace_enable) {
450 		TRAP_TRACE_CTL *ctlp;
451 		caddr_t httrace_buf_va;
452 
453 		ASSERT(cpuid < max_ncpus);
454 		ctlp = &trap_trace_ctl[cpuid];
455 		httrace_buf_va = ctlp->d.hvaddr_base;
456 		if (httrace_buf_va == htrap_tr0) {
457 			bzero(httrace_buf_va, HTRAP_TSIZE);
458 		} else if (httrace_buf_va != NULL) {
459 			contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
460 		}
461 		ctlp->d.hvaddr_base = NULL;
462 		ctlp->d.hlimit = 0;
463 		ctlp->d.hpaddr_base = NULL;
464 	}
465 }
466 
467 /*
468  * Load any required machine class (sun4v) specific drivers.
469  */
470 void
471 load_mach_drivers(void)
472 {
473 	/*
474 	 * We don't want to load these LDOMs-specific
475 	 * modules if domaining is not supported.  Also,
476 	 * we must be able to run on non-LDOMs firmware.
477 	 */
478 	if (!domaining_supported())
479 		return;
480 
481 	/*
482 	 * Load the core domain services module
483 	 */
484 	if (modload("misc", "ds") == -1)
485 		cmn_err(CE_NOTE, "!'ds' module failed to load");
486 
487 	/*
488 	 * Load the rest of the domain services
489 	 */
490 	if (modload("misc", "fault_iso") == -1)
491 		cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
492 
493 	if (modload("misc", "platsvc") == -1)
494 		cmn_err(CE_NOTE, "!'platsvc' module failed to load");
495 
496 	if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
497 		cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
498 
499 	if (modload("misc", "dr_io") == -1)
500 		cmn_err(CE_NOTE, "!'dr_io' module failed to load");
501 
502 	/*
503 	 * Attempt to attach any virtual device servers. These
504 	 * drivers must be loaded at start of day so that they
505 	 * can respond to any updates to the machine description.
506 	 *
507 	 * Since it is quite likely that a domain will not support
508 	 * one or more of these servers, failures are ignored.
509 	 */
510 
511 	/* virtual disk server */
512 	(void) i_ddi_attach_hw_nodes("vds");
513 
514 	/* virtual network switch */
515 	(void) i_ddi_attach_hw_nodes("vsw");
516 
517 	/* virtual console concentrator */
518 	(void) i_ddi_attach_hw_nodes("vcc");
519 }
520