1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/machsystm.h>
27 #include <sys/archsystm.h>
28 #include <sys/prom_plat.h>
29 #include <sys/promif.h>
30 #include <sys/vm.h>
31 #include <sys/cpu.h>
32 #include <sys/bitset.h>
33 #include <sys/cpupart.h>
34 #include <sys/disp.h>
35 #include <sys/hypervisor_api.h>
36 #include <sys/traptrace.h>
37 #include <sys/modctl.h>
38 #include <sys/ldoms.h>
39 #include <sys/cpu_module.h>
40 #include <sys/mutex_impl.h>
41 #include <sys/rwlock.h>
42 #include <sys/sdt.h>
43 #include <sys/cmt.h>
44 #include <vm/vm_dep.h>
45
46 #ifdef TRAPTRACE
47 int mach_htraptrace_enable = 1;
48 #else
49 int mach_htraptrace_enable = 0;
50 #endif
51 int htrap_tr0_inuse = 0;
52 extern char htrap_tr0[]; /* prealloc buf for boot cpu */
53
54 caddr_t mmu_fault_status_area;
55
56 extern void sfmmu_set_tsbs(void);
57 /*
58 * CPU IDLE optimization variables/routines
59 */
60 static int enable_halt_idle_cpus = 1;
61
62 /*
63 * Defines for the idle_state_transition DTrace probe
64 *
65 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield)
66 * The agument passed is the state to which the CPU is transitioning.
67 *
68 * The states are defined here.
69 */
70 #define IDLE_STATE_NORMAL 0
71 #define IDLE_STATE_YIELDED 1
72
73 #define SUN4V_CLOCK_TICK_THRESHOLD 64
74 #define SUN4V_CLOCK_TICK_NCPUS 64
75
76 extern int clock_tick_threshold;
77 extern int clock_tick_ncpus;
78
79 uint_t cp_haltset_fanout = 3;
80
81 void
setup_trap_table(void)82 setup_trap_table(void)
83 {
84 caddr_t mmfsa_va;
85 extern caddr_t mmu_fault_status_area;
86 mmfsa_va =
87 mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id);
88
89 intr_init(CPU); /* init interrupt request free list */
90 setwstate(WSTATE_KERN);
91 set_mmfsa_scratchpad(mmfsa_va);
92 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va));
93 sfmmu_set_tsbs();
94 }
95
96 void
phys_install_has_changed(void)97 phys_install_has_changed(void)
98 {
99
100 }
101
102 /*
103 * Halt the present CPU until awoken via an interrupt
104 */
105 static void
cpu_halt(void)106 cpu_halt(void)
107 {
108 cpu_t *cpup = CPU;
109 processorid_t cpu_sid = cpup->cpu_seqid;
110 cpupart_t *cp = cpup->cpu_part;
111 int hset_update = 1;
112 volatile int *p = &cpup->cpu_disp->disp_nrunnable;
113 uint_t s;
114
115 /*
116 * If this CPU is online then we should notate our halting
117 * by adding ourselves to the partition's halted CPU
118 * bitset. This allows other CPUs to find/awaken us when
119 * work becomes available.
120 */
121 if (CPU->cpu_flags & CPU_OFFLINE)
122 hset_update = 0;
123
124 /*
125 * Add ourselves to the partition's halted CPUs bitset
126 * and set our HALTED flag, if necessary.
127 *
128 * When a thread becomes runnable, it is placed on the queue
129 * and then the halted cpu bitset is checked to determine who
130 * (if anyone) should be awoken. We therefore need to first
131 * add ourselves to the halted bitset, and then check if there
132 * is any work available. The order is important to prevent a race
133 * that can lead to work languishing on a run queue somewhere while
134 * this CPU remains halted.
135 *
136 * Either the producing CPU will see we're halted and will awaken us,
137 * or this CPU will see the work available in disp_anywork()
138 */
139 if (hset_update) {
140 cpup->cpu_disp_flags |= CPU_DISP_HALTED;
141 membar_producer();
142 bitset_atomic_add(&cp->cp_haltset, cpu_sid);
143 }
144
145 /*
146 * Check to make sure there's really nothing to do.
147 * Work destined for this CPU may become available after
148 * this check. We'll be notified through the clearing of our
149 * bit in the halted CPU bitset, and a poke.
150 */
151 if (disp_anywork()) {
152 if (hset_update) {
153 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
154 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
155 }
156 return;
157 }
158
159 /*
160 * We're on our way to being halted. Wait until something becomes
161 * runnable locally or we are awaken (i.e. removed from the halt set).
162 * Note that the call to hv_cpu_yield() can return even if we have
163 * nothing to do.
164 *
165 * Disable interrupts now, so that we'll awaken immediately
166 * after halting if someone tries to poke us between now and
167 * the time we actually halt.
168 *
169 * We check for the presence of our bit after disabling interrupts.
170 * If it's cleared, we'll return. If the bit is cleared after
171 * we check then the poke will pop us out of the halted state.
172 * Also, if the offlined CPU has been brought back on-line, then
173 * we return as well.
174 *
175 * The ordering of the poke and the clearing of the bit by cpu_wakeup
176 * is important.
177 * cpu_wakeup() must clear, then poke.
178 * cpu_halt() must disable interrupts, then check for the bit.
179 *
180 * The check for anything locally runnable is here for performance
181 * and isn't needed for correctness. disp_nrunnable ought to be
182 * in our cache still, so it's inexpensive to check, and if there
183 * is anything runnable we won't have to wait for the poke.
184 *
185 * Any interrupt will awaken the cpu from halt. Looping here
186 * will filter spurious interrupts that wake us up, but don't
187 * represent a need for us to head back out to idle(). This
188 * will enable the idle loop to be more efficient and sleep in
189 * the processor pipeline for a larger percent of the time,
190 * which returns useful cycles to the peer hardware strand
191 * that shares the pipeline.
192 */
193 s = disable_vec_intr();
194 while (*p == 0 &&
195 ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
196 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
197
198 DTRACE_PROBE1(idle__state__transition,
199 uint_t, IDLE_STATE_YIELDED);
200 (void) hv_cpu_yield();
201 DTRACE_PROBE1(idle__state__transition,
202 uint_t, IDLE_STATE_NORMAL);
203
204 enable_vec_intr(s);
205 s = disable_vec_intr();
206 }
207
208 /*
209 * We're no longer halted
210 */
211 enable_vec_intr(s);
212 if (hset_update) {
213 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
214 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
215 }
216 }
217
218 /*
219 * If "cpu" is halted, then wake it up clearing its halted bit in advance.
220 * Otherwise, see if other CPUs in the cpu partition are halted and need to
221 * be woken up so that they can steal the thread we placed on this CPU.
222 * This function is only used on MP systems.
223 */
224 static void
cpu_wakeup(cpu_t * cpu,int bound)225 cpu_wakeup(cpu_t *cpu, int bound)
226 {
227 uint_t cpu_found;
228 processorid_t cpu_sid;
229 cpupart_t *cp;
230
231 cp = cpu->cpu_part;
232 cpu_sid = cpu->cpu_seqid;
233 if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
234 /*
235 * Clear the halted bit for that CPU since it will be
236 * poked in a moment.
237 */
238 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
239 /*
240 * We may find the current CPU present in the halted cpu bitset
241 * if we're in the context of an interrupt that occurred
242 * before we had a chance to clear our bit in cpu_halt().
243 * Poking ourself is obviously unnecessary, since if
244 * we're here, we're not halted.
245 */
246 if (cpu != CPU)
247 poke_cpu(cpu->cpu_id);
248 return;
249 } else {
250 /*
251 * This cpu isn't halted, but it's idle or undergoing a
252 * context switch. No need to awaken anyone else.
253 */
254 if (cpu->cpu_thread == cpu->cpu_idle_thread ||
255 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
256 return;
257 }
258
259 /*
260 * No need to wake up other CPUs if this is for a bound thread.
261 */
262 if (bound)
263 return;
264
265 /*
266 * The CPU specified for wakeup isn't currently halted, so check
267 * to see if there are any other halted CPUs in the partition,
268 * and if there are then awaken one.
269 */
270 do {
271 cpu_found = bitset_find(&cp->cp_haltset);
272 if (cpu_found == (uint_t)-1)
273 return;
274 } while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
275
276 if (cpu_found != CPU->cpu_seqid)
277 poke_cpu(cpu_seq[cpu_found]->cpu_id);
278 }
279
280 void
mach_cpu_halt_idle(void)281 mach_cpu_halt_idle(void)
282 {
283 if (enable_halt_idle_cpus) {
284 idle_cpu = cpu_halt;
285 disp_enq_thread = cpu_wakeup;
286 }
287 }
288
289 int
ndata_alloc_mmfsa(struct memlist * ndata)290 ndata_alloc_mmfsa(struct memlist *ndata)
291 {
292 size_t size;
293
294 size = MMFSA_SIZE * max_ncpus;
295 mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize);
296 if (mmu_fault_status_area == NULL)
297 return (-1);
298 return (0);
299 }
300
301 void
mach_memscrub(void)302 mach_memscrub(void)
303 {
304 /* no memscrub support for sun4v for now */
305 }
306
307 void
mach_fpras()308 mach_fpras()
309 {
310 /* no fpras support for sun4v for now */
311 }
312
313 void
mach_hw_copy_limit(void)314 mach_hw_copy_limit(void)
315 {
316 /* HW copy limits set by individual CPU module */
317 }
318
319 /*
320 * We need to enable soft ring functionality on Niagara platforms since
321 * one strand can't handle interrupts for a 1Gb NIC. So set the tunable
322 * mac_soft_ring_enable by default on this platform.
323 * mac_soft_ring_enable variable is defined in space.c and used by MAC
324 * module. This tunable in concert with mac_soft_ring_count (declared
325 * in mac.h) will configure the number of fanout soft rings for a link.
326 */
327 extern boolean_t mac_soft_ring_enable;
328 void
startup_platform(void)329 startup_platform(void)
330 {
331 mac_soft_ring_enable = B_TRUE;
332 if (clock_tick_threshold == 0)
333 clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD;
334 if (clock_tick_ncpus == 0)
335 clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS;
336 /* set per-platform constants for mutex_backoff */
337 mutex_backoff_base = 1;
338 mutex_cap_factor = 4;
339 if (l2_cache_node_count() > 1) {
340 /* VF for example */
341 mutex_backoff_base = 2;
342 mutex_cap_factor = 64;
343 }
344 rw_lock_backoff = default_lock_backoff;
345 rw_lock_delay = default_lock_delay;
346 }
347
348 /*
349 * This function sets up hypervisor traptrace buffer
350 * This routine is called by the boot cpu only
351 */
352 void
mach_htraptrace_setup(int cpuid)353 mach_htraptrace_setup(int cpuid)
354 {
355 TRAP_TRACE_CTL *ctlp;
356 int bootcpuid = getprocessorid(); /* invoked on boot cpu only */
357
358 if (mach_htraptrace_enable && ((cpuid != bootcpuid) ||
359 !htrap_tr0_inuse)) {
360 ctlp = &trap_trace_ctl[cpuid];
361 ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 :
362 contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE);
363 if (ctlp->d.hvaddr_base == NULL) {
364 ctlp->d.hlimit = 0;
365 ctlp->d.hpaddr_base = NULL;
366 cmn_err(CE_WARN, "!cpu%d: failed to allocate HV "
367 "traptrace buffer", cpuid);
368 } else {
369 ctlp->d.hlimit = HTRAP_TSIZE;
370 ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base);
371 }
372 }
373 }
374
375 /*
376 * This function enables or disables the hypervisor traptracing
377 */
378 void
mach_htraptrace_configure(int cpuid)379 mach_htraptrace_configure(int cpuid)
380 {
381 uint64_t ret;
382 uint64_t prev_buf, prev_bufsize;
383 uint64_t prev_enable;
384 uint64_t size;
385 TRAP_TRACE_CTL *ctlp;
386
387 ctlp = &trap_trace_ctl[cpuid];
388 if (mach_htraptrace_enable) {
389 if ((ctlp->d.hvaddr_base != NULL) &&
390 ((ctlp->d.hvaddr_base != htrap_tr0) ||
391 (!htrap_tr0_inuse))) {
392 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
393 if ((ret == H_EOK) && (prev_bufsize != 0)) {
394 cmn_err(CE_CONT,
395 "!cpu%d: previous HV traptrace buffer of "
396 "size 0x%lx at address 0x%lx", cpuid,
397 prev_bufsize, prev_buf);
398 }
399
400 ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base,
401 ctlp->d.hlimit /
402 (sizeof (struct htrap_trace_record)), &size);
403 if (ret == H_EOK) {
404 ret = hv_ttrace_enable(\
405 (uint64_t)TRAP_TENABLE_ALL, &prev_enable);
406 if (ret != H_EOK) {
407 cmn_err(CE_WARN,
408 "!cpu%d: HV traptracing not "
409 "enabled, ta: 0x%x returned error: "
410 "%ld", cpuid, TTRACE_ENABLE, ret);
411 } else {
412 if (ctlp->d.hvaddr_base == htrap_tr0)
413 htrap_tr0_inuse = 1;
414 }
415 } else {
416 cmn_err(CE_WARN,
417 "!cpu%d: HV traptrace buffer not "
418 "configured, ta: 0x%x returned error: %ld",
419 cpuid, TTRACE_BUF_CONF, ret);
420 }
421 /*
422 * set hvaddr_base to NULL when traptrace buffer
423 * registration fails
424 */
425 if (ret != H_EOK) {
426 ctlp->d.hvaddr_base = NULL;
427 ctlp->d.hlimit = 0;
428 ctlp->d.hpaddr_base = NULL;
429 }
430 }
431 } else {
432 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize);
433 if ((ret == H_EOK) && (prev_bufsize != 0)) {
434 ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL,
435 &prev_enable);
436 if (ret == H_EOK) {
437 if (ctlp->d.hvaddr_base == htrap_tr0)
438 htrap_tr0_inuse = 0;
439 ctlp->d.hvaddr_base = NULL;
440 ctlp->d.hlimit = 0;
441 ctlp->d.hpaddr_base = NULL;
442 } else
443 cmn_err(CE_WARN,
444 "!cpu%d: HV traptracing is not disabled, "
445 "ta: 0x%x returned error: %ld",
446 cpuid, TTRACE_ENABLE, ret);
447 }
448 }
449 }
450
451 /*
452 * This function cleans up the hypervisor traptrace buffer
453 */
454 void
mach_htraptrace_cleanup(int cpuid)455 mach_htraptrace_cleanup(int cpuid)
456 {
457 if (mach_htraptrace_enable) {
458 TRAP_TRACE_CTL *ctlp;
459 caddr_t httrace_buf_va;
460
461 ASSERT(cpuid < max_ncpus);
462 ctlp = &trap_trace_ctl[cpuid];
463 httrace_buf_va = ctlp->d.hvaddr_base;
464 if (httrace_buf_va == htrap_tr0) {
465 bzero(httrace_buf_va, HTRAP_TSIZE);
466 } else if (httrace_buf_va != NULL) {
467 contig_mem_free(httrace_buf_va, HTRAP_TSIZE);
468 }
469 ctlp->d.hvaddr_base = NULL;
470 ctlp->d.hlimit = 0;
471 ctlp->d.hpaddr_base = NULL;
472 }
473 }
474
475 /*
476 * Load any required machine class (sun4v) specific drivers.
477 */
478 void
load_mach_drivers(void)479 load_mach_drivers(void)
480 {
481 /*
482 * We don't want to load these LDOMs-specific
483 * modules if domaining is not supported. Also,
484 * we must be able to run on non-LDOMs firmware.
485 */
486 if (!domaining_supported())
487 return;
488
489 /*
490 * Load the core domain services module
491 */
492 if (modload("misc", "ds") == -1)
493 cmn_err(CE_NOTE, "!'ds' module failed to load");
494
495 /*
496 * Load the rest of the domain services
497 */
498 if (modload("misc", "fault_iso") == -1)
499 cmn_err(CE_NOTE, "!'fault_iso' module failed to load");
500
501 if (modload("misc", "platsvc") == -1)
502 cmn_err(CE_NOTE, "!'platsvc' module failed to load");
503
504 if (domaining_enabled() && modload("misc", "dr_cpu") == -1)
505 cmn_err(CE_NOTE, "!'dr_cpu' module failed to load");
506
507 if (modload("misc", "dr_io") == -1)
508 cmn_err(CE_NOTE, "!'dr_io' module failed to load");
509
510 if (modload("misc", "dr_mem") == -1)
511 cmn_err(CE_NOTE, "!'dr_mem' module failed to load");
512
513 /*
514 * Attempt to attach any virtual device servers. These
515 * drivers must be loaded at start of day so that they
516 * can respond to any updates to the machine description.
517 *
518 * Since it is quite likely that a domain will not support
519 * one or more of these servers, failures are ignored.
520 */
521
522 /* virtual disk server */
523 (void) i_ddi_attach_hw_nodes("vds");
524
525 /* virtual network switch */
526 (void) i_ddi_attach_hw_nodes("vsw");
527
528 /* virtual console concentrator */
529 (void) i_ddi_attach_hw_nodes("vcc");
530 }
531
532 void
set_platform_defaults(void)533 set_platform_defaults(void)
534 {
535 /*
536 * Allow at most one context domain per 8 CPUs, which is ample for
537 * good performance. Do not make this too large, because it
538 * increases the space consumed in the per-process sfmmu structure.
539 */
540 if (max_mmu_ctxdoms == 0)
541 max_mmu_ctxdoms = (NCPU + 7) / 8;
542 }
543