1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/cpuvar.h>
26 #include <sys/systm.h>
27 #include <sys/sysmacros.h>
28 #include <sys/promif.h>
29 #include <sys/platform_module.h>
30 #include <sys/cmn_err.h>
31 #include <sys/errno.h>
32 #include <sys/machsystm.h>
33 #include <sys/bootconf.h>
34 #include <sys/nvpair.h>
35 #include <sys/kobj.h>
36 #include <sys/mem_cage.h>
37 #include <sys/opl.h>
38 #include <sys/scfd/scfostoescf.h>
39 #include <sys/cpu_sgnblk_defs.h>
40 #include <sys/utsname.h>
41 #include <sys/ddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/lgrp.h>
44 #include <sys/memnode.h>
45 #include <sys/sysmacros.h>
46 #include <sys/time.h>
47 #include <sys/cpu.h>
48 #include <sys/dumphdr.h>
49 #include <vm/vm_dep.h>
50
51 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
52 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp);
53 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp);
54 int (*opl_get_mem_addr)(char *unum, char *sid,
55 uint64_t offset, uint64_t *paddr);
56
57 /* Memory for fcode claims. 16k times # maximum possible IO units */
58 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000)
59 int efcode_size = EFCODE_SIZE;
60
61 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */
62
63 /* Set the maximum number of boards for DR */
64 int opl_boards = OPL_MAX_BOARDS;
65
66 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t);
67
68 extern int tsb_lgrp_affinity;
69
70 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) *
71 (OPL_MAX_TSBS_PER_PCICH);
72
73 pgcnt_t opl_startup_cage_size = 0;
74
75 /*
76 * The length of the delay in seconds in communication with XSCF after
77 * which the warning message will be logged.
78 */
79 uint_t xscf_connect_delay = 60 * 15;
80
81 static opl_model_info_t opl_models[] = {
82 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE },
83 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE },
84 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE },
85 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE },
86 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE },
87 { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE },
88 };
89 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t);
90
91 /*
92 * opl_cur_model
93 */
94 static opl_model_info_t *opl_cur_model = NULL;
95
96 static struct memlist *opl_memlist_per_board(struct memlist *ml);
97 static void post_xscf_msg(char *, int);
98 static void pass2xscf_thread();
99
100 /*
101 * Note FF/DC out-of-order instruction engine takes only a
102 * single cycle to execute each spin loop
103 * for comparison, Panther takes 6 cycles for same loop
104 * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time
105 * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C)
106 * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep
107 * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus
108 * Listed values tuned for 2.15GHz to 2.64GHz systems
109 * Value may change for future systems
110 */
111 #define OPL_BOFF_SPIN 7
112 #define OPL_BOFF_SLEEP 4
113 #define OPL_BOFF_TM 1600
114 #define OPL_BOFF_MAX_SCALE 8
115
116 #define OPL_CLOCK_TICK_THRESHOLD 128
117 #define OPL_CLOCK_TICK_NCPUS 64
118
119 extern int clock_tick_threshold;
120 extern int clock_tick_ncpus;
121
122 int
set_platform_max_ncpus(void)123 set_platform_max_ncpus(void)
124 {
125 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS);
126 }
127
128 int
set_platform_tsb_spares(void)129 set_platform_tsb_spares(void)
130 {
131 return (MIN(opl_tsb_spares, MAX_UPA));
132 }
133
134 static void
set_model_info()135 set_model_info()
136 {
137 extern int ts_dispatch_extended;
138 char name[MAXSYSNAME];
139 int i;
140
141 /*
142 * Get model name from the root node.
143 *
144 * We are using the prom device tree since, at this point,
145 * the Solaris device tree is not yet setup.
146 */
147 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name);
148
149 for (i = 0; i < opl_num_models; i++) {
150 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) {
151 opl_cur_model = &opl_models[i];
152 break;
153 }
154 }
155
156 /*
157 * If model not matched, it's an unknown model.
158 * Just return. It will default to standard dispatch tables.
159 */
160 if (i == opl_num_models)
161 return;
162
163 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) &&
164 (ts_dispatch_extended == -1)) {
165 /*
166 * Based on a platform model, select a dispatch table.
167 * Only DC2 and DC3 systems uses the alternate/extended
168 * TS dispatch table.
169 * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch
170 * tables.
171 */
172 ts_dispatch_extended = 1;
173 }
174
175 }
176
177 static void
set_max_mmu_ctxdoms()178 set_max_mmu_ctxdoms()
179 {
180 extern uint_t max_mmu_ctxdoms;
181 int max_boards;
182
183 /*
184 * From the model, get the maximum number of boards
185 * supported and set the value accordingly. If the model
186 * could not be determined or recognized, we assume the max value.
187 */
188 if (opl_cur_model == NULL)
189 max_boards = OPL_MAX_BOARDS;
190 else
191 max_boards = opl_cur_model->model_max_boards;
192
193 /*
194 * On OPL, cores and MMUs are one-to-one.
195 */
196 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards;
197 }
198
199 #pragma weak mmu_init_large_pages
200
201 void
set_platform_defaults(void)202 set_platform_defaults(void)
203 {
204 extern char *tod_module_name;
205 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int);
206 extern void mmu_init_large_pages(size_t);
207
208 /* Set the CPU signature function pointer */
209 cpu_sgn_func = cpu_sgn_update;
210
211 /* Set appropriate tod module for OPL platform */
212 ASSERT(tod_module_name == NULL);
213 tod_module_name = "todopl";
214
215 if ((mmu_page_sizes == max_mmu_page_sizes) &&
216 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) {
217 if (&mmu_init_large_pages)
218 mmu_init_large_pages(mmu_ism_pagesize);
219 }
220
221 tsb_lgrp_affinity = 1;
222
223 set_max_mmu_ctxdoms();
224
225 /* set OPL threshold for compressed dumps */
226 dump_plat_mincpu_default = DUMP_PLAT_SUN4U_OPL_MINCPU;
227 }
228
229 /*
230 * Convert logical a board number to a physical one.
231 */
232
233 #define LSBPROP "board#"
234 #define PSBPROP "physical-board#"
235
236 int
opl_get_physical_board(int id)237 opl_get_physical_board(int id)
238 {
239 dev_info_t *root_dip, *dip = NULL;
240 char *dname = NULL;
241 int circ;
242
243 pnode_t pnode;
244 char pname[MAXSYSNAME] = {0};
245
246 int lsb_id; /* Logical System Board ID */
247 int psb_id; /* Physical System Board ID */
248
249
250 /*
251 * This function is called on early stage of bootup when the
252 * kernel device tree is not initialized yet, and also
253 * later on when the device tree is up. We want to try
254 * the fast track first.
255 */
256 root_dip = ddi_root_node();
257 if (root_dip) {
258 /* Get from devinfo node */
259 ndi_devi_enter(root_dip, &circ);
260 for (dip = ddi_get_child(root_dip); dip;
261 dip = ddi_get_next_sibling(dip)) {
262
263 dname = ddi_node_name(dip);
264 if (strncmp(dname, "pseudo-mc", 9) != 0)
265 continue;
266
267 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip,
268 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1)
269 continue;
270
271 if (id == lsb_id) {
272 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY,
273 dip, DDI_PROP_DONTPASS, PSBPROP, -1))
274 == -1) {
275 ndi_devi_exit(root_dip, circ);
276 return (-1);
277 } else {
278 ndi_devi_exit(root_dip, circ);
279 return (psb_id);
280 }
281 }
282 }
283 ndi_devi_exit(root_dip, circ);
284 }
285
286 /*
287 * We do not have the kernel device tree, or we did not
288 * find the node for some reason (let's say the kernel
289 * device tree was modified), let's try the OBP tree.
290 */
291 pnode = prom_rootnode();
292 for (pnode = prom_childnode(pnode); pnode;
293 pnode = prom_nextnode(pnode)) {
294
295 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) ||
296 (strncmp(pname, "pseudo-mc", 9) != 0))
297 continue;
298
299 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1)
300 continue;
301
302 if (id == lsb_id) {
303 if (prom_getprop(pnode, PSBPROP,
304 (caddr_t)&psb_id) == -1) {
305 return (-1);
306 } else {
307 return (psb_id);
308 }
309 }
310 }
311
312 return (-1);
313 }
314
315 /*
316 * For OPL it's possible that memory from two or more successive boards
317 * will be contiguous across the boards, and therefore represented as a
318 * single chunk.
319 * This function splits such chunks down the board boundaries.
320 */
321 static struct memlist *
opl_memlist_per_board(struct memlist * ml)322 opl_memlist_per_board(struct memlist *ml)
323 {
324 uint64_t ssize, low, high, boundary;
325 struct memlist *head, *tail, *new;
326
327 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT);
328
329 head = tail = NULL;
330
331 for (; ml; ml = ml->ml_next) {
332 low = (uint64_t)ml->ml_address;
333 high = low+(uint64_t)(ml->ml_size);
334 while (low < high) {
335 boundary = roundup(low+1, ssize);
336 boundary = MIN(high, boundary);
337 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP);
338 new->ml_address = low;
339 new->ml_size = boundary - low;
340 if (head == NULL)
341 head = new;
342 if (tail) {
343 tail->ml_next = new;
344 new->ml_prev = tail;
345 }
346 tail = new;
347 low = boundary;
348 }
349 }
350 return (head);
351 }
352
353 void
set_platform_cage_params(void)354 set_platform_cage_params(void)
355 {
356 extern pgcnt_t total_pages;
357 extern struct memlist *phys_avail;
358 struct memlist *ml, *tml;
359
360 if (kernel_cage_enable) {
361 pgcnt_t preferred_cage_size;
362
363 preferred_cage_size = MAX(opl_startup_cage_size,
364 total_pages / 256);
365
366 ml = opl_memlist_per_board(phys_avail);
367
368 /*
369 * Note: we are assuming that post has load the
370 * whole show in to the high end of memory. Having
371 * taken this leap, we copy the whole of phys_avail
372 * the glist and arrange for the cage to grow
373 * downward (descending pfns).
374 */
375 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size);
376
377 /* free the memlist */
378 do {
379 tml = ml->ml_next;
380 kmem_free(ml, sizeof (struct memlist));
381 ml = tml;
382 } while (ml != NULL);
383 }
384
385 if (kcage_on)
386 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED");
387 else
388 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED");
389 }
390
391 /*ARGSUSED*/
392 int
plat_cpu_poweron(struct cpu * cp)393 plat_cpu_poweron(struct cpu *cp)
394 {
395 int (*opl_cpu_poweron)(struct cpu *) = NULL;
396
397 opl_cpu_poweron =
398 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0);
399
400 if (opl_cpu_poweron == NULL)
401 return (ENOTSUP);
402 else
403 return ((opl_cpu_poweron)(cp));
404
405 }
406
407 /*ARGSUSED*/
408 int
plat_cpu_poweroff(struct cpu * cp)409 plat_cpu_poweroff(struct cpu *cp)
410 {
411 int (*opl_cpu_poweroff)(struct cpu *) = NULL;
412
413 opl_cpu_poweroff =
414 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0);
415
416 if (opl_cpu_poweroff == NULL)
417 return (ENOTSUP);
418 else
419 return ((opl_cpu_poweroff)(cp));
420
421 }
422
423 int
plat_max_boards(void)424 plat_max_boards(void)
425 {
426 /*
427 * If the model cannot be determined, default to the max value.
428 * Otherwise, Ikkaku model only supports 1 system board.
429 */
430 if ((opl_cur_model != NULL) && (opl_cur_model->model_type == IKKAKU))
431 return (OPL_MAX_BOARDS_IKKAKU);
432 else
433 return (OPL_MAX_BOARDS);
434 }
435
436 int
plat_max_cpu_units_per_board(void)437 plat_max_cpu_units_per_board(void)
438 {
439 return (OPL_MAX_CPU_PER_BOARD);
440 }
441
442 int
plat_max_mem_units_per_board(void)443 plat_max_mem_units_per_board(void)
444 {
445 return (OPL_MAX_MEM_UNITS_PER_BOARD);
446 }
447
448 int
plat_max_io_units_per_board(void)449 plat_max_io_units_per_board(void)
450 {
451 return (OPL_MAX_IO_UNITS_PER_BOARD);
452 }
453
454 int
plat_max_cmp_units_per_board(void)455 plat_max_cmp_units_per_board(void)
456 {
457 return (OPL_MAX_CMP_UNITS_PER_BOARD);
458 }
459
460 int
plat_max_core_units_per_board(void)461 plat_max_core_units_per_board(void)
462 {
463 return (OPL_MAX_CORE_UNITS_PER_BOARD);
464 }
465
466 int
plat_pfn_to_mem_node(pfn_t pfn)467 plat_pfn_to_mem_node(pfn_t pfn)
468 {
469 return (pfn >> mem_node_pfn_shift);
470 }
471
472 /* ARGSUSED */
473 void
plat_build_mem_nodes(prom_memlist_t * list,size_t nelems)474 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems)
475 {
476 size_t elem;
477 pfn_t basepfn;
478 pgcnt_t npgs;
479 uint64_t boundary, ssize;
480 uint64_t low, high;
481
482 /*
483 * OPL mem slices are always aligned on a 256GB boundary.
484 */
485 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT;
486 mem_node_physalign = 0;
487
488 /*
489 * Boot install lists are arranged <addr, len>, <addr, len>, ...
490 */
491 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT);
492 for (elem = 0; elem < nelems; list++, elem++) {
493 low = list->addr;
494 high = low + list->size;
495 while (low < high) {
496 boundary = roundup(low+1, ssize);
497 boundary = MIN(high, boundary);
498 basepfn = btop(low);
499 npgs = btop(boundary - low);
500 mem_node_add_slice(basepfn, basepfn + npgs - 1);
501 low = boundary;
502 }
503 }
504 }
505
506 /*
507 * Find the CPU associated with a slice at boot-time.
508 */
509 void
plat_fill_mc(pnode_t nodeid)510 plat_fill_mc(pnode_t nodeid)
511 {
512 int board;
513 int memnode;
514 struct {
515 uint64_t addr;
516 uint64_t size;
517 } mem_range;
518
519 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) {
520 panic("Can not find board# property in mc node %x", nodeid);
521 }
522 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) {
523 panic("Can not find sb-mem-ranges property in mc node %x",
524 nodeid);
525 }
526 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT;
527 plat_assign_lgrphand_to_mem_node(board, memnode);
528 }
529
530 /*
531 * Return the platform handle for the lgroup containing the given CPU
532 *
533 * For OPL, lgroup platform handle == board #.
534 */
535
536 extern int mpo_disabled;
537 extern lgrp_handle_t lgrp_default_handle;
538
539 lgrp_handle_t
plat_lgrp_cpu_to_hand(processorid_t id)540 plat_lgrp_cpu_to_hand(processorid_t id)
541 {
542 lgrp_handle_t plathand;
543
544 /*
545 * Return the real platform handle for the CPU until
546 * such time as we know that MPO should be disabled.
547 * At that point, we set the "mpo_disabled" flag to true,
548 * and from that point on, return the default handle.
549 *
550 * By the time we know that MPO should be disabled, the
551 * first CPU will have already been added to a leaf
552 * lgroup, but that's ok. The common lgroup code will
553 * double check that the boot CPU is in the correct place,
554 * and in the case where mpo should be disabled, will move
555 * it to the root if necessary.
556 */
557 if (mpo_disabled) {
558 /* If MPO is disabled, return the default (UMA) handle */
559 plathand = lgrp_default_handle;
560 } else
561 plathand = (lgrp_handle_t)LSB_ID(id);
562 return (plathand);
563 }
564
565 /*
566 * Platform specific lgroup initialization
567 */
568 void
plat_lgrp_init(void)569 plat_lgrp_init(void)
570 {
571 extern uint32_t lgrp_expand_proc_thresh;
572 extern uint32_t lgrp_expand_proc_diff;
573 const uint_t m = LGRP_LOADAVG_THREAD_MAX;
574
575 /*
576 * Set tuneables for the OPL architecture
577 *
578 * lgrp_expand_proc_thresh is the threshold load on the set of
579 * lgroups a process is currently using on before considering
580 * adding another lgroup to the set. For Oly-C and Jupiter
581 * systems, there are four sockets per lgroup. Setting
582 * lgrp_expand_proc_thresh to add lgroups when the load reaches
583 * four threads will spread the load when it exceeds one thread
584 * per socket, optimizing memory bandwidth and L2 cache space.
585 *
586 * lgrp_expand_proc_diff determines how much less another lgroup
587 * must be loaded before shifting the start location of a thread
588 * to it.
589 *
590 * lgrp_loadavg_tolerance is the threshold where two lgroups are
591 * considered to have different loads. It is set to be less than
592 * 1% so that even a small residual load will be considered different
593 * from no residual load.
594 *
595 * We note loadavg values are not precise.
596 * Every 1/10 of a second loadavg values are reduced by 5%.
597 * This adjustment can come in the middle of the lgroup selection
598 * process, and for larger parallel apps with many threads can
599 * frequently occur between the start of the second thread
600 * placement and the finish of the last thread placement.
601 * We also must be careful to not use too small of a threshold
602 * since the cumulative decay for 1 second idle time is 40%.
603 * That is, the residual load from completed threads will still
604 * be 60% one second after the proc goes idle or 8% after 5 seconds.
605 *
606 * To allow for lag time in loadavg calculations
607 * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX
608 * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX
609 * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX
610 *
611 * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX
612 * as the equivalent of a load of 1. To make the code more compact,
613 * we set m = LGRP_LOADAVG_THREAD_MAX.
614 */
615 lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2);
616 lgrp_expand_proc_diff = (m >> 1) + (m >> 2);
617 lgrp_loadavg_tolerance = (m >> 7);
618 }
619
620 /*
621 * Platform notification of lgroup (re)configuration changes
622 */
623 /*ARGSUSED*/
624 void
plat_lgrp_config(lgrp_config_flag_t evt,uintptr_t arg)625 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg)
626 {
627 update_membounds_t *umb;
628 lgrp_config_mem_rename_t lmr;
629 int sbd, tbd;
630 lgrp_handle_t hand, shand, thand;
631 int mnode, snode, tnode;
632 pfn_t start, end;
633
634 if (mpo_disabled)
635 return;
636
637 switch (evt) {
638
639 case LGRP_CONFIG_MEM_ADD:
640 /*
641 * Establish the lgroup handle to memnode translation.
642 */
643 umb = (update_membounds_t *)arg;
644
645 hand = umb->u_board;
646 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT);
647 plat_assign_lgrphand_to_mem_node(hand, mnode);
648
649 break;
650
651 case LGRP_CONFIG_MEM_DEL:
652 /*
653 * Special handling for possible memory holes.
654 */
655 umb = (update_membounds_t *)arg;
656 hand = umb->u_board;
657 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) {
658 if (mem_node_config[mnode].exists) {
659 start = mem_node_config[mnode].physbase;
660 end = mem_node_config[mnode].physmax;
661 mem_node_del_slice(start, end);
662 }
663 }
664
665 break;
666
667 case LGRP_CONFIG_MEM_RENAME:
668 /*
669 * During a DR copy-rename operation, all of the memory
670 * on one board is moved to another board -- but the
671 * addresses/pfns and memnodes don't change. This means
672 * the memory has changed locations without changing identity.
673 *
674 * Source is where we are copying from and target is where we
675 * are copying to. After source memnode is copied to target
676 * memnode, the physical addresses of the target memnode are
677 * renamed to match what the source memnode had. Then target
678 * memnode can be removed and source memnode can take its
679 * place.
680 *
681 * To do this, swap the lgroup handle to memnode mappings for
682 * the boards, so target lgroup will have source memnode and
683 * source lgroup will have empty target memnode which is where
684 * its memory will go (if any is added to it later).
685 *
686 * Then source memnode needs to be removed from its lgroup
687 * and added to the target lgroup where the memory was living
688 * but under a different name/memnode. The memory was in the
689 * target memnode and now lives in the source memnode with
690 * different physical addresses even though it is the same
691 * memory.
692 */
693 sbd = arg & 0xffff;
694 tbd = (arg & 0xffff0000) >> 16;
695 shand = sbd;
696 thand = tbd;
697 snode = plat_lgrphand_to_mem_node(shand);
698 tnode = plat_lgrphand_to_mem_node(thand);
699
700 /*
701 * Special handling for possible memory holes.
702 */
703 if (tnode != -1 && mem_node_config[tnode].exists) {
704 start = mem_node_config[tnode].physbase;
705 end = mem_node_config[tnode].physmax;
706 mem_node_del_slice(start, end);
707 }
708
709 plat_assign_lgrphand_to_mem_node(thand, snode);
710 plat_assign_lgrphand_to_mem_node(shand, tnode);
711
712 lmr.lmem_rename_from = shand;
713 lmr.lmem_rename_to = thand;
714
715 /*
716 * Remove source memnode of copy rename from its lgroup
717 * and add it to its new target lgroup
718 */
719 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode,
720 (uintptr_t)&lmr);
721
722 break;
723
724 default:
725 break;
726 }
727 }
728
729 /*
730 * Return latency between "from" and "to" lgroups
731 *
732 * This latency number can only be used for relative comparison
733 * between lgroups on the running system, cannot be used across platforms,
734 * and may not reflect the actual latency. It is platform and implementation
735 * specific, so platform gets to decide its value. It would be nice if the
736 * number was at least proportional to make comparisons more meaningful though.
737 * NOTE: The numbers below are supposed to be load latencies for uncached
738 * memory divided by 10.
739 *
740 */
741 int
plat_lgrp_latency(lgrp_handle_t from,lgrp_handle_t to)742 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to)
743 {
744 /*
745 * Return min remote latency when there are more than two lgroups
746 * (root and child) and getting latency between two different lgroups
747 * or root is involved
748 */
749 if (lgrp_optimizations() && (from != to ||
750 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE))
751 return (42);
752 else
753 return (35);
754 }
755
756 /*
757 * Return platform handle for root lgroup
758 */
759 lgrp_handle_t
plat_lgrp_root_hand(void)760 plat_lgrp_root_hand(void)
761 {
762 if (mpo_disabled)
763 return (lgrp_default_handle);
764
765 return (LGRP_DEFAULT_HANDLE);
766 }
767
768 /*ARGSUSED*/
769 void
plat_freelist_process(int mnode)770 plat_freelist_process(int mnode)
771 {
772 }
773
774 void
load_platform_drivers(void)775 load_platform_drivers(void)
776 {
777 (void) i_ddi_attach_pseudo_node("dr");
778 }
779
780 /*
781 * No platform drivers on this platform
782 */
783 char *platform_module_list[] = {
784 (char *)0
785 };
786
787 /*ARGSUSED*/
788 void
plat_tod_fault(enum tod_fault_type tod_bad)789 plat_tod_fault(enum tod_fault_type tod_bad)
790 {
791 }
792
793 /*ARGSUSED*/
794 void
cpu_sgn_update(ushort_t sgn,uchar_t state,uchar_t sub_state,int cpuid)795 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid)
796 {
797 static void (*scf_panic_callback)(int);
798 static void (*scf_shutdown_callback)(int);
799
800 /*
801 * This is for notifing system panic/shutdown to SCF.
802 * In case of shutdown and panic, SCF call back
803 * function should be called.
804 * <SCF call back functions>
805 * scf_panic_callb() : panicsys()->panic_quiesce_hw()
806 * scf_shutdown_callb(): halt() or power_down() or reboot_machine()
807 * cpuid should be -1 and state should be SIGST_EXIT.
808 */
809 if (state == SIGST_EXIT && cpuid == -1) {
810
811 /*
812 * find the symbol for the SCF panic callback routine in driver
813 */
814 if (scf_panic_callback == NULL)
815 scf_panic_callback = (void (*)(int))
816 modgetsymvalue("scf_panic_callb", 0);
817 if (scf_shutdown_callback == NULL)
818 scf_shutdown_callback = (void (*)(int))
819 modgetsymvalue("scf_shutdown_callb", 0);
820
821 switch (sub_state) {
822 case SIGSUBST_PANIC:
823 if (scf_panic_callback == NULL) {
824 cmn_err(CE_NOTE, "!cpu_sgn_update: "
825 "scf_panic_callb not found\n");
826 return;
827 }
828 scf_panic_callback(SIGSUBST_PANIC);
829 break;
830
831 case SIGSUBST_HALT:
832 if (scf_shutdown_callback == NULL) {
833 cmn_err(CE_NOTE, "!cpu_sgn_update: "
834 "scf_shutdown_callb not found\n");
835 return;
836 }
837 scf_shutdown_callback(SIGSUBST_HALT);
838 break;
839
840 case SIGSUBST_ENVIRON:
841 if (scf_shutdown_callback == NULL) {
842 cmn_err(CE_NOTE, "!cpu_sgn_update: "
843 "scf_shutdown_callb not found\n");
844 return;
845 }
846 scf_shutdown_callback(SIGSUBST_ENVIRON);
847 break;
848
849 case SIGSUBST_REBOOT:
850 if (scf_shutdown_callback == NULL) {
851 cmn_err(CE_NOTE, "!cpu_sgn_update: "
852 "scf_shutdown_callb not found\n");
853 return;
854 }
855 scf_shutdown_callback(SIGSUBST_REBOOT);
856 break;
857 }
858 }
859 }
860
861 /*ARGSUSED*/
862 int
plat_get_mem_unum(int synd_code,uint64_t flt_addr,int flt_bus_id,int flt_in_memory,ushort_t flt_status,char * buf,int buflen,int * lenp)863 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id,
864 int flt_in_memory, ushort_t flt_status,
865 char *buf, int buflen, int *lenp)
866 {
867 /*
868 * check if it's a Memory error.
869 */
870 if (flt_in_memory) {
871 if (opl_get_mem_unum != NULL) {
872 return (opl_get_mem_unum(synd_code, flt_addr, buf,
873 buflen, lenp));
874 } else {
875 return (ENOTSUP);
876 }
877 } else {
878 return (ENOTSUP);
879 }
880 }
881
882 /*ARGSUSED*/
883 int
plat_get_cpu_unum(int cpuid,char * buf,int buflen,int * lenp)884 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
885 {
886 int ret = 0;
887 int sb;
888 int plen;
889
890 sb = opl_get_physical_board(LSB_ID(cpuid));
891 if (sb == -1) {
892 return (ENXIO);
893 }
894
895 /*
896 * opl_cur_model is assigned here
897 */
898 if (opl_cur_model == NULL) {
899 set_model_info();
900
901 /*
902 * if not matched, return
903 */
904 if (opl_cur_model == NULL)
905 return (ENODEV);
906 }
907
908 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type));
909
910 switch (opl_cur_model->model_type) {
911 case FF1:
912 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A",
913 CHIP_ID(cpuid) / 2);
914 break;
915
916 case FF2:
917 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B",
918 (CHIP_ID(cpuid) / 2) + (sb * 2));
919 break;
920
921 case DC1:
922 case DC2:
923 case DC3:
924 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb,
925 CHIP_ID(cpuid));
926 break;
927
928 case IKKAKU:
929 plen = snprintf(buf, buflen, "/%s", "MBU_A");
930 break;
931
932 default:
933 /* This should never happen */
934 return (ENODEV);
935 }
936
937 if (plen >= buflen) {
938 ret = ENOSPC;
939 } else {
940 if (lenp)
941 *lenp = strlen(buf);
942 }
943 return (ret);
944 }
945
946 void
plat_nodename_set(void)947 plat_nodename_set(void)
948 {
949 post_xscf_msg((char *)&utsname, sizeof (struct utsname));
950 }
951
952 caddr_t efcode_vaddr = NULL;
953
954 /*
955 * Preallocate enough memory for fcode claims.
956 */
957
958 caddr_t
efcode_alloc(caddr_t alloc_base)959 efcode_alloc(caddr_t alloc_base)
960 {
961 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base,
962 MMU_PAGESIZE);
963 caddr_t vaddr;
964
965 /*
966 * allocate the physical memory for the Oberon fcode.
967 */
968 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base,
969 efcode_size, MMU_PAGESIZE)) == NULL)
970 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory");
971
972 efcode_vaddr = vaddr;
973
974 return (efcode_alloc_base + efcode_size);
975 }
976
977 caddr_t
plat_startup_memlist(caddr_t alloc_base)978 plat_startup_memlist(caddr_t alloc_base)
979 {
980 caddr_t tmp_alloc_base;
981
982 tmp_alloc_base = efcode_alloc(alloc_base);
983 tmp_alloc_base =
984 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize);
985 return (tmp_alloc_base);
986 }
987
988 /* need to forward declare these */
989 static void plat_lock_delay(uint_t);
990
991 void
startup_platform(void)992 startup_platform(void)
993 {
994 if (clock_tick_threshold == 0)
995 clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD;
996 if (clock_tick_ncpus == 0)
997 clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS;
998 mutex_lock_delay = plat_lock_delay;
999 mutex_cap_factor = OPL_BOFF_MAX_SCALE;
1000 }
1001
1002 static uint_t
get_mmu_id(processorid_t cpuid)1003 get_mmu_id(processorid_t cpuid)
1004 {
1005 int pb = opl_get_physical_board(LSB_ID(cpuid));
1006
1007 if (pb == -1) {
1008 cmn_err(CE_PANIC,
1009 "opl_get_physical_board failed (cpu %d LSB %u)",
1010 cpuid, LSB_ID(cpuid));
1011 }
1012 return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) *
1013 OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid);
1014 }
1015
1016 void
plat_cpuid_to_mmu_ctx_info(processorid_t cpuid,mmu_ctx_info_t * info)1017 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info)
1018 {
1019 int impl;
1020
1021 impl = cpunodes[cpuid].implementation;
1022 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) {
1023 info->mmu_idx = get_mmu_id(cpuid);
1024 info->mmu_nctxs = 8192;
1025 } else {
1026 cmn_err(CE_PANIC, "Unknown processor %d", impl);
1027 }
1028 }
1029
1030 int
plat_get_mem_sid(char * unum,char * buf,int buflen,int * lenp)1031 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
1032 {
1033 if (opl_get_mem_sid == NULL) {
1034 return (ENOTSUP);
1035 }
1036 return (opl_get_mem_sid(unum, buf, buflen, lenp));
1037 }
1038
1039 int
plat_get_mem_offset(uint64_t paddr,uint64_t * offp)1040 plat_get_mem_offset(uint64_t paddr, uint64_t *offp)
1041 {
1042 if (opl_get_mem_offset == NULL) {
1043 return (ENOTSUP);
1044 }
1045 return (opl_get_mem_offset(paddr, offp));
1046 }
1047
1048 int
plat_get_mem_addr(char * unum,char * sid,uint64_t offset,uint64_t * addrp)1049 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
1050 {
1051 if (opl_get_mem_addr == NULL) {
1052 return (ENOTSUP);
1053 }
1054 return (opl_get_mem_addr(unum, sid, offset, addrp));
1055 }
1056
1057 void
plat_lock_delay(uint_t backoff)1058 plat_lock_delay(uint_t backoff)
1059 {
1060 int i;
1061 uint_t cnt, remcnt;
1062 int ctr;
1063 hrtime_t delay_start, rem_delay;
1064 /*
1065 * Platform specific lock delay code for OPL
1066 *
1067 * Using staged linear increases in the delay.
1068 * The sleep instruction is the preferred method of delay,
1069 * but is too large of granularity for the initial backoff.
1070 */
1071
1072 if (backoff < 100) {
1073 /*
1074 * If desired backoff is long enough,
1075 * use sleep for most of it
1076 */
1077 for (cnt = backoff;
1078 cnt >= OPL_BOFF_SLEEP;
1079 cnt -= OPL_BOFF_SLEEP) {
1080 cpu_smt_pause();
1081 }
1082 /*
1083 * spin for small remainder of backoff
1084 */
1085 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) {
1086 mutex_delay_default();
1087 }
1088 } else {
1089 /* backoff is large. Fill it by sleeping */
1090 delay_start = gethrtime_waitfree();
1091 cnt = backoff / OPL_BOFF_SLEEP;
1092 /*
1093 * use sleep instructions for delay
1094 */
1095 for (i = 0; i < cnt; i++) {
1096 cpu_smt_pause();
1097 }
1098
1099 /*
1100 * Note: if the other strand executes a sleep instruction,
1101 * then the sleep ends immediately with a minimum time of
1102 * 42 clocks. We check gethrtime to insure we have
1103 * waited long enough. And we include both a short
1104 * spin loop and a sleep for repeated delay times.
1105 */
1106
1107 rem_delay = gethrtime_waitfree() - delay_start;
1108 while (rem_delay < cnt * OPL_BOFF_TM) {
1109 remcnt = cnt - (rem_delay / OPL_BOFF_TM);
1110 for (i = 0; i < remcnt; i++) {
1111 cpu_smt_pause();
1112 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) {
1113 mutex_delay_default();
1114 }
1115 }
1116 rem_delay = gethrtime_waitfree() - delay_start;
1117 }
1118 }
1119 }
1120
1121 /*
1122 * The following code implements asynchronous call to XSCF to setup the
1123 * domain node name.
1124 */
1125
1126 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len))
1127
1128 /*
1129 * The following three macros define the all operations on the request
1130 * list we are using here, and hide the details of the list
1131 * implementation from the code.
1132 */
1133 #define PUSH(m) \
1134 { \
1135 (m)->next = ctl_msg.head; \
1136 (m)->prev = NULL; \
1137 if ((m)->next != NULL) \
1138 (m)->next->prev = (m); \
1139 ctl_msg.head = (m); \
1140 }
1141
1142 #define REMOVE(m) \
1143 { \
1144 if ((m)->prev != NULL) \
1145 (m)->prev->next = (m)->next; \
1146 else \
1147 ctl_msg.head = (m)->next; \
1148 if ((m)->next != NULL) \
1149 (m)->next->prev = (m)->prev; \
1150 }
1151
1152 #define FREE_THE_TAIL(head) \
1153 { \
1154 nm_msg_t *n_msg, *m; \
1155 m = (head)->next; \
1156 (head)->next = NULL; \
1157 while (m != NULL) { \
1158 n_msg = m->next; \
1159 FREE_MSG(m); \
1160 m = n_msg; \
1161 } \
1162 }
1163
1164 #define SCF_PUTINFO(f, s, p) \
1165 f(KEY_ESCF, 0x01, 0, s, p)
1166
1167 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \
1168 (m)->len, (m)->data)) == 0)
1169
1170 /*
1171 * The value of the following macro loosely depends on the
1172 * value of the "device busy" timeout used in the SCF driver.
1173 * (See pass2xscf_thread()).
1174 */
1175 #define SCF_DEVBUSY_DELAY 10
1176
1177 /*
1178 * The default number of attempts to contact the scf driver
1179 * if we cannot fetch any information about the timeout value
1180 * it uses.
1181 */
1182
1183 #define REPEATS 4
1184
1185 typedef struct nm_msg {
1186 struct nm_msg *next;
1187 struct nm_msg *prev;
1188 int len;
1189 char data[1];
1190 } nm_msg_t;
1191
1192 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1)
1193
1194 static struct ctlmsg {
1195 nm_msg_t *head;
1196 nm_msg_t *now_serving;
1197 kmutex_t nm_lock;
1198 kthread_t *nmt;
1199 int cnt;
1200 int (*scf_service_function)(uint32_t, uint8_t,
1201 uint32_t, uint32_t, void *);
1202 } ctl_msg;
1203
1204 static void
post_xscf_msg(char * dp,int len)1205 post_xscf_msg(char *dp, int len)
1206 {
1207 nm_msg_t *msg;
1208
1209 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP);
1210
1211 bcopy(dp, msg->data, len);
1212 msg->len = len;
1213
1214 mutex_enter(&ctl_msg.nm_lock);
1215 if (ctl_msg.nmt == NULL) {
1216 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread,
1217 NULL, 0, &p0, TS_RUN, minclsyspri);
1218 }
1219
1220 PUSH(msg);
1221 ctl_msg.cnt++;
1222 mutex_exit(&ctl_msg.nm_lock);
1223 }
1224
1225 static void
pass2xscf_thread()1226 pass2xscf_thread()
1227 {
1228 nm_msg_t *msg;
1229 int ret;
1230 uint_t i, msg_sent, xscf_driver_delay;
1231 static uint_t repeat_cnt;
1232 uint_t *scf_wait_cnt;
1233
1234 mutex_enter(&ctl_msg.nm_lock);
1235
1236 /*
1237 * Find the address of the SCF put routine if it's not done yet.
1238 */
1239 if (ctl_msg.scf_service_function == NULL) {
1240 if ((ctl_msg.scf_service_function =
1241 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *))
1242 modgetsymvalue("scf_service_putinfo", 0)) == NULL) {
1243 cmn_err(CE_NOTE, "pass2xscf_thread: "
1244 "scf_service_putinfo not found\n");
1245 ctl_msg.nmt = NULL;
1246 mutex_exit(&ctl_msg.nm_lock);
1247 return;
1248 }
1249 }
1250
1251 /*
1252 * Calculate the number of attempts to connect XSCF based on the
1253 * scf driver delay (which is
1254 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value
1255 * of xscf_connect_delay (the total number of seconds to wait
1256 * till xscf get ready.)
1257 */
1258 if (repeat_cnt == 0) {
1259 if ((scf_wait_cnt =
1260 (uint_t *)
1261 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) {
1262 repeat_cnt = REPEATS;
1263 } else {
1264
1265 xscf_driver_delay = *scf_wait_cnt *
1266 SCF_DEVBUSY_DELAY;
1267 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1;
1268 }
1269 }
1270
1271 while (ctl_msg.cnt != 0) {
1272
1273 /*
1274 * Take the very last request from the queue,
1275 */
1276 ctl_msg.now_serving = ctl_msg.head;
1277 ASSERT(ctl_msg.now_serving != NULL);
1278
1279 /*
1280 * and discard all the others if any.
1281 */
1282 FREE_THE_TAIL(ctl_msg.now_serving);
1283 ctl_msg.cnt = 1;
1284 mutex_exit(&ctl_msg.nm_lock);
1285
1286 /*
1287 * Pass the name to XSCF. Note please, we do not hold the
1288 * mutex while we are doing this.
1289 */
1290 msg_sent = 0;
1291 for (i = 0; i < repeat_cnt; i++) {
1292 if (PASS2XSCF(ctl_msg.now_serving, ret)) {
1293 msg_sent = 1;
1294 break;
1295 } else {
1296 if (ret != EBUSY) {
1297 cmn_err(CE_NOTE, "pass2xscf_thread:"
1298 " unexpected return code"
1299 " from scf_service_putinfo():"
1300 " %d\n", ret);
1301 }
1302 }
1303 }
1304
1305 if (msg_sent) {
1306
1307 /*
1308 * Remove the request from the list
1309 */
1310 mutex_enter(&ctl_msg.nm_lock);
1311 msg = ctl_msg.now_serving;
1312 ctl_msg.now_serving = NULL;
1313 REMOVE(msg);
1314 ctl_msg.cnt--;
1315 mutex_exit(&ctl_msg.nm_lock);
1316 FREE_MSG(msg);
1317 } else {
1318
1319 /*
1320 * If while we have tried to communicate with
1321 * XSCF there were any other requests we are
1322 * going to drop this one and take the latest
1323 * one. Otherwise we will try to pass this one
1324 * again.
1325 */
1326 cmn_err(CE_NOTE,
1327 "pass2xscf_thread: "
1328 "scf_service_putinfo "
1329 "not responding\n");
1330 }
1331 mutex_enter(&ctl_msg.nm_lock);
1332 }
1333
1334 /*
1335 * The request queue is empty, exit.
1336 */
1337 ctl_msg.nmt = NULL;
1338 mutex_exit(&ctl_msg.nm_lock);
1339 }
1340