xref: /titanic_52/usr/src/uts/sun4v/os/mach_startup.c (revision c1c61f44e88f4c8c155272ee56d868043146096a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/machsystm.h>
30 #include <sys/archsystm.h>
31 #include <sys/prom_plat.h>
32 #include <sys/promif.h>
33 #include <sys/vm.h>
34 #include <sys/cpu.h>
35 #include <sys/atomic.h>
36 #include <sys/cpupart.h>
37 #include <sys/disp.h>
38 #include <sys/hypervisor_api.h>
39 #include <sys/traptrace.h>
40 #include <sys/modctl.h>
41 #include <sys/ldoms.h>
42 #include <sys/cpu_module.h>
43 #include <sys/mutex_impl.h>
44 #include <sys/rwlock.h>
45 #include <vm/vm_dep.h>
46 #include <sys/sdt.h>
47 
48 #ifdef TRAPTRACE
49 int mach_htraptrace_enable = 1;
50 #else
51 int mach_htraptrace_enable = 0;
52 #endif
53 int htrap_tr0_inuse = 0;
54 extern char htrap_tr0[];	/* prealloc buf for boot cpu */
55 
56 caddr_t	mmu_fault_status_area;
57 
58 extern void sfmmu_set_tsbs(void);
59 /*
60  * CPU IDLE optimization variables/routines
61  */
62 static int enable_halt_idle_cpus = 1;
63 
64 /*
65  * Defines for the idle_state_transition DTrace probe
66  *
67  * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
68  * The agument passed is the state to which the CPU is transitioning.
69  *
70  * The states are defined here.
71  */
72 #define	IDLE_STATE_NORMAL 0
73 #define	IDLE_STATE_YIELDED 1
74 
75 #define	SUN4V_CLOCK_TICK_THRESHOLD	64
76 #define	SUN4V_CLOCK_TICK_NCPUS		64
77 
78 extern int	clock_tick_threshold;
79 extern int	clock_tick_ncpus;
80 
81 void
82 setup_trap_table(void)
83 {
84 	caddr_t mmfsa_va;
85 	extern	 caddr_t mmu_fault_status_area;
86 	mmfsa_va =
87 	    mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
88 
89 	intr_init(CPU);		/* init interrupt request free list */
90 	setwstate(WSTATE_KERN);
91 	set_mmfsa_scratchpad(mmfsa_va);
92 	prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
93 	sfmmu_set_tsbs();
94 }
95 
96 void
97 phys_install_has_changed(void)
98 {
99 
100 }
101 
102 /*
103  * Halt the present CPU until awoken via an interrupt
104  */
105 static void
106 cpu_halt(void)
107 {
108 	cpu_t *cpup = CPU;
109 	processorid_t cpun = cpup->cpu_id;
110 	cpupart_t *cp = cpup->cpu_part;
111 	int hset_update = 1;
112 	volatile int *p = &cpup->cpu_disp->disp_nrunnable;
113 	uint_t s;
114 
115 	/*
116 	 * If this CPU is online, and there's multiple CPUs
117 	 * in the system, then we should notate our halting
118 	 * by adding ourselves to the partition's halted CPU
119 	 * bitmap. This allows other CPUs to find/awaken us when
120 	 * work becomes available.
121 	 */
122 	if (CPU->cpu_flags & CPU_OFFLINE || ncpus == 1)
123 		hset_update = 0;
124 
125 	/*
126 	 * Add ourselves to the partition's halted CPUs bitmask
127 	 * and set our HALTED flag, if necessary.
128 	 *
129 	 * When a thread becomes runnable, it is placed on the queue
130 	 * and then the halted cpuset is checked to determine who
131 	 * (if anyone) should be awoken. We therefore need to first
132 	 * add ourselves to the halted cpuset, and then check if there
133 	 * is any work available.
134 	 */
135 	if (hset_update) {
136 		cpup->cpu_disp_flags |= CPU_DISP_HALTED;
137 		membar_producer();
138 		CPUSET_ATOMIC_ADD(cp->cp_mach->mc_haltset, cpun);
139 	}
140 
141 	/*
142 	 * Check to make sure there's really nothing to do.
143 	 * Work destined for this CPU may become available after
144 	 * this check. We'll be notified through the clearing of our
145 	 * bit in the halted CPU bitmask, and a poke.
146 	 */
147 	if (disp_anywork()) {
148 		if (hset_update) {
149 			cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
150 			CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
151 		}
152 		return;
153 	}
154 
155 	/*
156 	 * We're on our way to being halted.  Wait until something becomes
157 	 * runnable locally or we are awaken (i.e. removed from the halt set).
158 	 * Note that the call to hv_cpu_yield() can return even if we have
159 	 * nothing to do.
160 	 *
161 	 * Disable interrupts now, so that we'll awaken immediately
162 	 * after halting if someone tries to poke us between now and
163 	 * the time we actually halt.
164 	 *
165 	 * We check for the presence of our bit after disabling interrupts.
166 	 * If it's cleared, we'll return. If the bit is cleared after
167 	 * we check then the poke will pop us out of the halted state.
168 	 * Also, if the offlined CPU has been brought back on-line, then
169 	 * we return as well.
170 	 *
171 	 * The ordering of the poke and the clearing of the bit by cpu_wakeup
172 	 * is important.
173 	 * cpu_wakeup() must clear, then poke.
174 	 * cpu_halt() must disable interrupts, then check for the bit.
175 	 *
176 	 * The check for anything locally runnable is here for performance
177 	 * and isn't needed for correctness. disp_nrunnable ought to be
178 	 * in our cache still, so it's inexpensive to check, and if there
179 	 * is anything runnable we won't have to wait for the poke.
180 	 *
181 	 */
182 	s = disable_vec_intr();
183 	while (*p == 0 &&
184 	    ((hset_update && CPU_IN_SET(cp->cp_mach->mc_haltset, cpun)) ||
185 	    (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
186 
187 		DTRACE_PROBE1(idle__state__transition,
188 		    uint_t, IDLE_STATE_YIELDED);
189 		(void) hv_cpu_yield();
190 		DTRACE_PROBE1(idle__state__transition,
191 		    uint_t, IDLE_STATE_NORMAL);
192 
193 		enable_vec_intr(s);
194 		s = disable_vec_intr();
195 	}
196 
197 	/*
198 	 * We're no longer halted
199 	 */
200 	enable_vec_intr(s);
201 	if (hset_update) {
202 		cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
203 		CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun);
204 	}
205 }
206 
207 /*
208  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
209  * Otherwise, see if other CPUs in the cpu partition are halted and need to
210  * be woken up so that they can steal the thread we placed on this CPU.
211  * This function is only used on MP systems.
212  */
213 static void
214 cpu_wakeup(cpu_t *cpu, int bound)
215 {
216 	uint_t		cpu_found;
217 	int		result;
218 	cpupart_t	*cp;
219 
220 	cp = cpu->cpu_part;
221 	if (CPU_IN_SET(cp->cp_mach->mc_haltset, cpu->cpu_id)) {
222 		/*
223 		 * Clear the halted bit for that CPU since it will be
224 		 * poked in a moment.
225 		 */
226 		CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpu->cpu_id);
227 		/*
228 		 * We may find the current CPU present in the halted cpuset
229 		 * if we're in the context of an interrupt that occurred
230 		 * before we had a chance to clear our bit in cpu_halt().
231 		 * Poking ourself is obviously unnecessary, since if
232 		 * we're here, we're not halted.
233 		 */
234 		if (cpu != CPU)
235 			poke_cpu(cpu->cpu_id);
236 		return;
237 	} else {
238 		/*
239 		 * This cpu isn't halted, but it's idle or undergoing a
240 		 * context switch. No need to awaken anyone else.
241 		 */
242 		if (cpu->cpu_thread == cpu->cpu_idle_thread ||
243 		    cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
244 			return;
245 	}
246 
247 	/*
248 	 * No need to wake up other CPUs if the thread we just enqueued
249 	 * is bound.
250 	 */
251 	if (bound)
252 		return;
253 
254 	/*
255 	 * See if there's any other halted CPUs. If there are, then
256 	 * select one, and awaken it.
257 	 * It's possible that after we find a CPU, somebody else
258 	 * will awaken it before we get the chance.
259 	 * In that case, look again.
260 	 */
261 	do {
262 		CPUSET_FIND(cp->cp_mach->mc_haltset, cpu_found);
263 		if (cpu_found == CPUSET_NOTINSET)
264 			return;
265 
266 		ASSERT(cpu_found >= 0 && cpu_found < NCPU);
267 		CPUSET_ATOMIC_XDEL(cp->cp_mach->mc_haltset, cpu_found, result);
268 	} while (result < 0);
269 
270 	if (cpu_found != CPU->cpu_id)
271 		poke_cpu(cpu_found);
272 }
273 
274 void
275 mach_cpu_halt_idle()
276 {
277 	if (enable_halt_idle_cpus) {
278 		idle_cpu = cpu_halt;
279 		disp_enq_thread = cpu_wakeup;
280 	}
281 }
282 
283 int
284 ndata_alloc_mmfsa(struct memlist *ndata)
285 {
286 	size_t	size;
287 
288 	size = MMFSA_SIZE * max_ncpus;
289 	mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
290 	if (mmu_fault_status_area == NULL)
291 		return (-1);
292 	return (0);
293 }
294 
295 void
296 mach_memscrub(void)
297 {
298 	/* no memscrub support for sun4v for now */
299 }
300 
301 void
302 mach_fpras()
303 {
304 	/* no fpras support for sun4v for now */
305 }
306 
307 void
308 mach_hw_copy_limit(void)
309 {
310 	/* HW copy limits set by individual CPU module */
311 }
312 
313 /*
314  * We need to enable soft ring functionality on Niagara platform since
315  * one strand can't handle interrupts for a 1Gb NIC. Set the tunable
316  * ip_squeue_soft_ring by default on this platform. We can also set
317  * ip_threads_per_cpu to track number of threads per core. The variables
318  * themselves are defined in space.c and used by IP module
319  */
320 extern uint_t ip_threads_per_cpu;
321 extern boolean_t ip_squeue_soft_ring;
322 void
323 startup_platform(void)
324 {
325 	ip_squeue_soft_ring = B_TRUE;
326 	if (clock_tick_threshold == 0)
327 		clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
328 	if (clock_tick_ncpus == 0)
329 		clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
330 	/* set per-platform constants for mutex_backoff */
331 	mutex_backoff_base = 1;
332 	mutex_cap_factor = 4;
333 	if (l2_cache_node_count() > 1) {
334 		/* VF for example */
335 		mutex_backoff_base = 2;
336 		mutex_cap_factor = 64;
337 	}
338 	rw_lock_backoff = default_lock_backoff;
339 	rw_lock_delay = default_lock_delay;
340 }
341 
342 /*
343  * This function sets up hypervisor traptrace buffer
344  * This routine is called by the boot cpu only
345  */
346 void
347 mach_htraptrace_setup(int cpuid)
348 {
349 	TRAP_TRACE_CTL	*ctlp;
350 	int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
351 
352 	if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
353 	    !htrap_tr0_inuse)) {
354 		ctlp = &trap_trace_ctl[cpuid];
355 		ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
356 		    contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
357 		if (ctlp->d.hvaddr_base == NULL) {
358 			ctlp->d.hlimit = 0;
359 			ctlp->d.hpaddr_base = NULL;
360 			cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
361 			    "traptrace buffer", cpuid);
362 		} else {
363 			ctlp->d.hlimit = HTRAP_TSIZE;
364 			ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
365 		}
366 	}
367 }
368 
369 /*
370  * This function enables or disables the hypervisor traptracing
371  */
372 void
373 mach_htraptrace_configure(int cpuid)
374 {
375 	uint64_t ret;
376 	uint64_t prev_buf, prev_bufsize;
377 	uint64_t prev_enable;
378 	uint64_t size;
379 	TRAP_TRACE_CTL	*ctlp;
380 
381 	ctlp = &trap_trace_ctl[cpuid];
382 	if (mach_htraptrace_enable) {
383 		if ((ctlp->d.hvaddr_base != NULL) &&
384 		    ((ctlp->d.hvaddr_base != htrap_tr0) ||
385 		    (!htrap_tr0_inuse))) {
386 			ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
387 			if ((ret == H_EOK) && (prev_bufsize != 0)) {
388 				cmn_err(CE_CONT,
389 				    "!cpu%d: previous HV traptrace buffer of "
390 				    "size 0x%lx at address 0x%lx", cpuid,
391 				    prev_bufsize, prev_buf);
392 			}
393 
394 			ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
395 			    ctlp->d.hlimit /
396 			    (sizeof (struct htrap_trace_record)), &size);
397 			if (ret == H_EOK) {
398 				ret = hv_ttrace_enable(\
399 				    (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
400 				if (ret != H_EOK) {
401 					cmn_err(CE_WARN,
402 					    "!cpu%d: HV traptracing not "
403 					    "enabled, ta: 0x%x returned error: "
404 					    "%ld", cpuid, TTRACE_ENABLE, ret);
405 				} else {
406 					if (ctlp->d.hvaddr_base == htrap_tr0)
407 						htrap_tr0_inuse = 1;
408 				}
409 			} else {
410 				cmn_err(CE_WARN,
411 				    "!cpu%d: HV traptrace buffer not "
412 				    "configured, ta: 0x%x returned error: %ld",
413 				    cpuid, TTRACE_BUF_CONF, ret);
414 			}
415 			/*
416 			 * set hvaddr_base to NULL when traptrace buffer
417 			 * registration fails
418 			 */
419 			if (ret != H_EOK) {
420 				ctlp->d.hvaddr_base = NULL;
421 				ctlp->d.hlimit = 0;
422 				ctlp->d.hpaddr_base = NULL;
423 			}
424 		}
425 	} else {
426 		ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
427 		if ((ret == H_EOK) && (prev_bufsize != 0)) {
428 			ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
429 			    &prev_enable);
430 			if (ret == H_EOK) {
431 				if (ctlp->d.hvaddr_base == htrap_tr0)
432 					htrap_tr0_inuse = 0;
433 				ctlp->d.hvaddr_base = NULL;
434 				ctlp->d.hlimit = 0;
435 				ctlp->d.hpaddr_base = NULL;
436 			} else
437 				cmn_err(CE_WARN,
438 				    "!cpu%d: HV traptracing is not disabled, "
439 				    "ta: 0x%x returned error: %ld",
440 				    cpuid, TTRACE_ENABLE, ret);
441 		}
442 	}
443 }
444 
445 /*
446  * This function cleans up the hypervisor traptrace buffer
447  */
448 void
449 mach_htraptrace_cleanup(int cpuid)
450 {
451 	if (mach_htraptrace_enable) {
452 		TRAP_TRACE_CTL *ctlp;
453 		caddr_t httrace_buf_va;
454 
455 		ASSERT(cpuid < max_ncpus);
456 		ctlp = &trap_trace_ctl[cpuid];
457 		httrace_buf_va = ctlp->d.hvaddr_base;
458 		if (httrace_buf_va == htrap_tr0) {
459 			bzero(httrace_buf_va, HTRAP_TSIZE);
460 		} else if (httrace_buf_va != NULL) {
461 			contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
462 		}
463 		ctlp->d.hvaddr_base = NULL;
464 		ctlp->d.hlimit = 0;
465 		ctlp->d.hpaddr_base = NULL;
466 	}
467 }
468 
469 /*
470  * Load any required machine class (sun4v) specific drivers.
471  */
472 void
473 load_mach_drivers(void)
474 {
475 	/*
476 	 * We don't want to load these LDOMs-specific
477 	 * modules if domaining is not supported.  Also,
478 	 * we must be able to run on non-LDOMs firmware.
479 	 */
480 	if (!domaining_supported())
481 		return;
482 
483 	/*
484 	 * Load the core domain services module
485 	 */
486 	if (modload("misc", "ds") == -1)
487 		cmn_err(CE_NOTE, "!'ds' module failed to load");
488 
489 	/*
490 	 * Load the rest of the domain services
491 	 */
492 	if (modload("misc", "fault_iso") == -1)
493 		cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
494 
495 	if (modload("misc", "platsvc") == -1)
496 		cmn_err(CE_NOTE, "!'platsvc' module failed to load");
497 
498 	if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
499 		cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
500 
501 	/*
502 	 * Attempt to attach any virtual device servers. These
503 	 * drivers must be loaded at start of day so that they
504 	 * can respond to any updates to the machine description.
505 	 *
506 	 * Since it is quite likely that a domain will not support
507 	 * one or more of these servers, failures are ignored.
508 	 */
509 
510 	/* virtual disk server */
511 	(void) i_ddi_attach_hw_nodes("vds");
512 
513 	/* virtual network switch */
514 	(void) i_ddi_attach_hw_nodes("vsw");
515 
516 	/* virtual console concentrator */
517 	(void) i_ddi_attach_hw_nodes("vcc");
518 }
519