1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Peter Tribble.
24 */
25
26 #include <sys/machsystm.h>
27 #include <sys/archsystm.h>
28 #include <sys/vm.h>
29 #include <sys/cpu.h>
30 #include <sys/cpupart.h>
31 #include <sys/cmt.h>
32 #include <sys/bitset.h>
33 #include <sys/reboot.h>
34 #include <sys/kdi.h>
35 #include <sys/bootconf.h>
36 #include <sys/memlist_plat.h>
37 #include <sys/memlist_impl.h>
38 #include <sys/prom_plat.h>
39 #include <sys/prom_isa.h>
40 #include <sys/autoconf.h>
41 #include <sys/intreg.h>
42 #include <sys/ivintr.h>
43 #include <sys/fpu/fpusystm.h>
44 #include <sys/iommutsb.h>
45 #include <vm/vm_dep.h>
46 #include <vm/seg_kmem.h>
47 #include <vm/seg_kpm.h>
48 #include <vm/seg_map.h>
49 #include <vm/seg_kp.h>
50 #include <sys/sysconf.h>
51 #include <vm/hat_sfmmu.h>
52 #include <sys/kobj.h>
53 #include <sys/sun4asi.h>
54 #include <sys/clconf.h>
55 #include <sys/platform_module.h>
56 #include <sys/panic.h>
57 #include <sys/cpu_sgnblk_defs.h>
58 #include <sys/clock.h>
59 #include <sys/fpras_impl.h>
60 #include <sys/prom_debug.h>
61 #include <sys/traptrace.h>
62 #include <sys/memnode.h>
63 #include <sys/mem_cage.h>
64
65 /*
66 * fpRAS implementation structures.
67 */
68 struct fpras_chkfn *fpras_chkfnaddrs[FPRAS_NCOPYOPS];
69 struct fpras_chkfngrp *fpras_chkfngrps;
70 struct fpras_chkfngrp *fpras_chkfngrps_base;
71 int fpras_frequency = -1;
72 int64_t fpras_interval = -1;
73
74 /*
75 * Increase unix symbol table size as a work around for 6828121
76 */
77 int alloc_mem_bermuda_triangle;
78
79 /*
80 * Halt idling cpus optimization
81 *
82 * This optimation is only enabled in platforms that have
83 * the CPU halt support. The cpu_halt_cpu() support is provided
84 * in the cpu module and it is referenced here with a pragma weak.
85 * The presence of this routine automatically enable the halt idling
86 * cpus functionality if the global switch enable_halt_idle_cpus
87 * is set (default is set).
88 *
89 */
90 #pragma weak cpu_halt_cpu
91 extern void cpu_halt_cpu();
92
93 /*
94 * Defines for the idle_state_transition DTrace probe
95 *
96 * The probe fires when the CPU undergoes an idle state change (e.g. halting)
97 * The agument passed is the state to which the CPU is transitioning.
98 *
99 * The states are defined here.
100 */
101 #define IDLE_STATE_NORMAL 0
102 #define IDLE_STATE_HALTED 1
103
104 int enable_halt_idle_cpus = 1; /* global switch */
105
106 uint_t cp_haltset_fanout = 3;
107
108 void
setup_trap_table(void)109 setup_trap_table(void)
110 {
111 intr_init(CPU); /* init interrupt request free list */
112 setwstate(WSTATE_KERN);
113 prom_set_traptable(&trap_table);
114 }
115
116 void
mach_fpras()117 mach_fpras()
118 {
119 if (fpras_implemented && !fpras_disable) {
120 int i;
121 struct fpras_chkfngrp *fcgp;
122 size_t chkfngrpsallocsz;
123
124 /*
125 * Note that we size off of NCPU and setup for
126 * all those possibilities regardless of whether
127 * the cpu id is present or not. We do this so that
128 * we don't have any construction or destruction
129 * activity to perform at DR time, and it's not
130 * costly in memory. We require block alignment.
131 */
132 chkfngrpsallocsz = NCPU * sizeof (struct fpras_chkfngrp);
133 fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz, KM_SLEEP);
134 if (IS_P2ALIGNED((uintptr_t)fpras_chkfngrps_base, 64)) {
135 fpras_chkfngrps = fpras_chkfngrps_base;
136 } else {
137 kmem_free(fpras_chkfngrps_base, chkfngrpsallocsz);
138 chkfngrpsallocsz += 64;
139 fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz,
140 KM_SLEEP);
141 fpras_chkfngrps = (struct fpras_chkfngrp *)
142 P2ROUNDUP((uintptr_t)fpras_chkfngrps_base, 64);
143 }
144
145 /*
146 * Copy our check function into place for each copy operation
147 * and each cpu id.
148 */
149 fcgp = &fpras_chkfngrps[0];
150 for (i = 0; i < FPRAS_NCOPYOPS; ++i)
151 bcopy((void *)fpras_chkfn_type1, &fcgp->fpras_fn[i],
152 sizeof (struct fpras_chkfn));
153 for (i = 1; i < NCPU; ++i)
154 *(&fpras_chkfngrps[i]) = *fcgp;
155
156 /*
157 * At definition fpras_frequency is set to -1, and it will
158 * still have that value unless changed in /etc/system (not
159 * strictly supported, but not preventable). The following
160 * both sets the default and sanity checks anything from
161 * /etc/system.
162 */
163 if (fpras_frequency < 0)
164 fpras_frequency = FPRAS_DEFAULT_FREQUENCY;
165
166 /*
167 * Now calculate fpras_interval. When fpras_interval
168 * becomes non-negative fpras checks will commence
169 * (copies before this point in boot will bypass fpras).
170 * Our stores of instructions must be visible; no need
171 * to flush as they're never been executed before.
172 */
173 membar_producer();
174 fpras_interval = (fpras_frequency == 0) ?
175 0 : sys_tick_freq / fpras_frequency;
176 }
177 }
178
179 void
mach_hw_copy_limit(void)180 mach_hw_copy_limit(void)
181 {
182 if (!fpu_exists) {
183 use_hw_bcopy = 0;
184 hw_copy_limit_1 = 0;
185 hw_copy_limit_2 = 0;
186 hw_copy_limit_4 = 0;
187 hw_copy_limit_8 = 0;
188 use_hw_bzero = 0;
189 }
190 }
191
192 void
load_tod_module()193 load_tod_module()
194 {
195 /*
196 * Load tod driver module for the tod part found on this system.
197 * Recompute the cpu frequency/delays based on tod as tod part
198 * tends to keep time more accurately.
199 */
200 if (tod_module_name == NULL || modload("tod", tod_module_name) == -1)
201 halt("Can't load tod module");
202 }
203
204 void
mach_memscrub(void)205 mach_memscrub(void)
206 {
207 /*
208 * Startup memory scrubber, if not running fpu emulation code.
209 */
210
211 #ifndef _HW_MEMSCRUB_SUPPORT
212 if (fpu_exists) {
213 if (memscrub_init()) {
214 cmn_err(CE_WARN,
215 "Memory scrubber failed to initialize");
216 }
217 }
218 #endif /* _HW_MEMSCRUB_SUPPORT */
219 }
220
221 /*
222 * Halt the present CPU until awoken via an interrupt.
223 * This routine should only be invoked if cpu_halt_cpu()
224 * exists and is supported, see mach_cpu_halt_idle()
225 */
226 void
cpu_halt(void)227 cpu_halt(void)
228 {
229 cpu_t *cpup = CPU;
230 processorid_t cpu_sid = cpup->cpu_seqid;
231 cpupart_t *cp = cpup->cpu_part;
232 int hset_update = 1;
233 volatile int *p = &cpup->cpu_disp->disp_nrunnable;
234 uint_t s;
235
236 /*
237 * If this CPU is online then we should notate our halting
238 * by adding ourselves to the partition's halted CPU
239 * bitset. This allows other CPUs to find/awaken us when
240 * work becomes available.
241 */
242 if (CPU->cpu_flags & CPU_OFFLINE)
243 hset_update = 0;
244
245 /*
246 * Add ourselves to the partition's halted CPUs bitset
247 * and set our HALTED flag, if necessary.
248 *
249 * When a thread becomes runnable, it is placed on the queue
250 * and then the halted cpu bitset is checked to determine who
251 * (if anyone) should be awoken. We therefore need to first
252 * add ourselves to the halted bitset, and then check if there
253 * is any work available. The order is important to prevent a race
254 * that can lead to work languishing on a run queue somewhere while
255 * this CPU remains halted.
256 *
257 * Either the producing CPU will see we're halted and will awaken us,
258 * or this CPU will see the work available in disp_anywork()
259 */
260 if (hset_update) {
261 cpup->cpu_disp_flags |= CPU_DISP_HALTED;
262 membar_producer();
263 bitset_atomic_add(&cp->cp_haltset, cpu_sid);
264 }
265
266 /*
267 * Check to make sure there's really nothing to do.
268 * Work destined for this CPU may become available after
269 * this check. We'll be notified through the clearing of our
270 * bit in the halted CPU bitset, and a poke.
271 */
272 if (disp_anywork()) {
273 if (hset_update) {
274 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
275 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
276 }
277 return;
278 }
279
280 /*
281 * We're on our way to being halted. Wait until something becomes
282 * runnable locally or we are awaken (i.e. removed from the halt set).
283 * Note that the call to hv_cpu_yield() can return even if we have
284 * nothing to do.
285 *
286 * Disable interrupts now, so that we'll awaken immediately
287 * after halting if someone tries to poke us between now and
288 * the time we actually halt.
289 *
290 * We check for the presence of our bit after disabling interrupts.
291 * If it's cleared, we'll return. If the bit is cleared after
292 * we check then the poke will pop us out of the halted state.
293 * Also, if the offlined CPU has been brought back on-line, then
294 * we return as well.
295 *
296 * The ordering of the poke and the clearing of the bit by cpu_wakeup
297 * is important.
298 * cpu_wakeup() must clear, then poke.
299 * cpu_halt() must disable interrupts, then check for the bit.
300 *
301 * The check for anything locally runnable is here for performance
302 * and isn't needed for correctness. disp_nrunnable ought to be
303 * in our cache still, so it's inexpensive to check, and if there
304 * is anything runnable we won't have to wait for the poke.
305 *
306 * Any interrupt will awaken the cpu from halt. Looping here
307 * will filter spurious interrupts that wake us up, but don't
308 * represent a need for us to head back out to idle(). This
309 * will enable the idle loop to be more efficient and sleep in
310 * the processor pipeline for a larger percent of the time,
311 * which returns useful cycles to the peer hardware strand
312 * that shares the pipeline.
313 */
314 s = disable_vec_intr();
315 while (*p == 0 &&
316 ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
317 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
318
319 DTRACE_PROBE1(idle__state__transition,
320 uint_t, IDLE_STATE_HALTED);
321 (void) cpu_halt_cpu();
322 DTRACE_PROBE1(idle__state__transition,
323 uint_t, IDLE_STATE_NORMAL);
324
325 enable_vec_intr(s);
326 s = disable_vec_intr();
327 }
328
329 /*
330 * We're no longer halted
331 */
332 enable_vec_intr(s);
333 if (hset_update) {
334 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
335 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
336 }
337 }
338
339 /*
340 * If "cpu" is halted, then wake it up clearing its halted bit in advance.
341 * Otherwise, see if other CPUs in the cpu partition are halted and need to
342 * be woken up so that they can steal the thread we placed on this CPU.
343 * This function is only used on MP systems.
344 * This function should only be invoked if cpu_halt_cpu()
345 * exists and is supported, see mach_cpu_halt_idle()
346 */
347 static void
cpu_wakeup(cpu_t * cpu,int bound)348 cpu_wakeup(cpu_t *cpu, int bound)
349 {
350 uint_t cpu_found;
351 processorid_t cpu_sid;
352 cpupart_t *cp;
353
354 cp = cpu->cpu_part;
355 cpu_sid = cpu->cpu_seqid;
356 if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
357 /*
358 * Clear the halted bit for that CPU since it will be
359 * poked in a moment.
360 */
361 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
362 /*
363 * We may find the current CPU present in the halted cpu bitset
364 * if we're in the context of an interrupt that occurred
365 * before we had a chance to clear our bit in cpu_halt().
366 * Poking ourself is obviously unnecessary, since if
367 * we're here, we're not halted.
368 */
369 if (cpu != CPU)
370 poke_cpu(cpu->cpu_id);
371 return;
372 } else {
373 /*
374 * This cpu isn't halted, but it's idle or undergoing a
375 * context switch. No need to awaken anyone else.
376 */
377 if (cpu->cpu_thread == cpu->cpu_idle_thread ||
378 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
379 return;
380 }
381
382 /*
383 * No need to wake up other CPUs if this is for a bound thread.
384 */
385 if (bound)
386 return;
387
388 /*
389 * The CPU specified for wakeup isn't currently halted, so check
390 * to see if there are any other halted CPUs in the partition,
391 * and if there are then awaken one.
392 *
393 * If possible, try to select a CPU close to the target, since this
394 * will likely trigger a migration.
395 */
396 do {
397 cpu_found = bitset_find(&cp->cp_haltset);
398 if (cpu_found == (uint_t)-1)
399 return;
400 } while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
401
402 if (cpu_found != CPU->cpu_seqid)
403 poke_cpu(cpu_seq[cpu_found]->cpu_id);
404 }
405
406 void
mach_cpu_halt_idle(void)407 mach_cpu_halt_idle(void)
408 {
409 if (enable_halt_idle_cpus) {
410 if (&cpu_halt_cpu) {
411 idle_cpu = cpu_halt;
412 disp_enq_thread = cpu_wakeup;
413 }
414 }
415 }
416
417 /*ARGSUSED*/
418 int
cpu_intrq_setup(struct cpu * cp)419 cpu_intrq_setup(struct cpu *cp)
420 {
421 /* Interrupt mondo queues not applicable to sun4u */
422 return (0);
423 }
424
425 /*ARGSUSED*/
426 void
cpu_intrq_cleanup(struct cpu * cp)427 cpu_intrq_cleanup(struct cpu *cp)
428 {
429 /* Interrupt mondo queues not applicable to sun4u */
430 }
431
432 /*ARGSUSED*/
433 void
cpu_intrq_register(struct cpu * cp)434 cpu_intrq_register(struct cpu *cp)
435 {
436 /* Interrupt/error queues not applicable to sun4u */
437 }
438
439 /*ARGSUSED*/
440 void
mach_htraptrace_setup(int cpuid)441 mach_htraptrace_setup(int cpuid)
442 {
443 /* Setup hypervisor traptrace buffer, not applicable to sun4u */
444 }
445
446 /*ARGSUSED*/
447 void
mach_htraptrace_configure(int cpuid)448 mach_htraptrace_configure(int cpuid)
449 {
450 /* enable/ disable hypervisor traptracing, not applicable to sun4u */
451 }
452
453 /*ARGSUSED*/
454 void
mach_htraptrace_cleanup(int cpuid)455 mach_htraptrace_cleanup(int cpuid)
456 {
457 /* cleanup hypervisor traptrace buffer, not applicable to sun4u */
458 }
459
460 void
mach_descrip_startup_init(void)461 mach_descrip_startup_init(void)
462 {
463 /*
464 * Only for sun4v.
465 * Initialize Machine description framework during startup.
466 */
467 }
468 void
mach_descrip_startup_fini(void)469 mach_descrip_startup_fini(void)
470 {
471 /*
472 * Only for sun4v.
473 * Clean up Machine Description framework during startup.
474 */
475 }
476
477 void
mach_descrip_init(void)478 mach_descrip_init(void)
479 {
480 /*
481 * Only for sun4v.
482 * Initialize Machine description framework.
483 */
484 }
485
486 void
hsvc_setup(void)487 hsvc_setup(void)
488 {
489 /* Setup hypervisor services, not applicable to sun4u */
490 }
491
492 void
load_mach_drivers(void)493 load_mach_drivers(void)
494 {
495 /* Currently no machine class (sun4u) specific drivers to load */
496 }
497
498 /*
499 * Find a physically contiguous area of twice the largest ecache size
500 * to be used while doing displacement flush of ecaches.
501 */
502 uint64_t
ecache_flush_address(void)503 ecache_flush_address(void)
504 {
505 struct memlist *pmem;
506 uint64_t flush_size;
507 uint64_t ret_val;
508
509 flush_size = ecache_size * 2;
510 for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
511 ret_val = P2ROUNDUP(pmem->ml_address, ecache_size);
512 if (ret_val + flush_size <= pmem->ml_address + pmem->ml_size)
513 return (ret_val);
514 }
515 return ((uint64_t)-1);
516 }
517
518 /*
519 * Called with the memlist lock held to say that phys_install has
520 * changed.
521 */
522 void
phys_install_has_changed(void)523 phys_install_has_changed(void)
524 {
525 /*
526 * Get the new address into a temporary just in case panicking
527 * involves use of ecache_flushaddr.
528 */
529 uint64_t new_addr;
530
531 new_addr = ecache_flush_address();
532 if (new_addr == (uint64_t)-1) {
533 cmn_err(CE_PANIC,
534 "ecache_flush_address(): failed, ecache_size=%x",
535 ecache_size);
536 /*NOTREACHED*/
537 }
538 ecache_flushaddr = new_addr;
539 membar_producer();
540 }
541