1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright 2023 Oxide Computer Company 27 */ 28 29 #include <sys/cpuvar.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/promif.h> 33 #include <sys/platform_module.h> 34 #include <sys/cmn_err.h> 35 #include <sys/errno.h> 36 #include <sys/machsystm.h> 37 #include <sys/bootconf.h> 38 #include <sys/nvpair.h> 39 #include <sys/kobj.h> 40 #include <sys/mem_cage.h> 41 #include <sys/opl.h> 42 #include <sys/scfd/scfostoescf.h> 43 #include <sys/cpu_sgnblk_defs.h> 44 #include <sys/utsname.h> 45 #include <sys/ddi.h> 46 #include <sys/sunndi.h> 47 #include <sys/lgrp.h> 48 #include <sys/memnode.h> 49 #include <sys/sysmacros.h> 50 #include <sys/time.h> 51 #include <sys/cpu.h> 52 #include <sys/dumphdr.h> 53 #include <vm/vm_dep.h> 54 55 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 56 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 57 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 58 int (*opl_get_mem_addr)(char *unum, char *sid, 59 uint64_t offset, uint64_t *paddr); 60 61 /* Memory for fcode claims. 16k times # maximum possible IO units */ 62 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 63 int efcode_size = EFCODE_SIZE; 64 65 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 66 67 /* Set the maximum number of boards for DR */ 68 int opl_boards = OPL_MAX_BOARDS; 69 70 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 71 72 extern int tsb_lgrp_affinity; 73 74 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 75 (OPL_MAX_TSBS_PER_PCICH); 76 77 pgcnt_t opl_startup_cage_size = 0; 78 79 /* 80 * The length of the delay in seconds in communication with XSCF after 81 * which the warning message will be logged. 82 */ 83 uint_t xscf_connect_delay = 60 * 15; 84 85 static opl_model_info_t opl_models[] = { 86 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 87 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 88 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 89 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 90 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 91 { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE }, 92 }; 93 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 94 95 /* 96 * opl_cur_model 97 */ 98 static opl_model_info_t *opl_cur_model = NULL; 99 100 static struct memlist *opl_memlist_per_board(struct memlist *ml); 101 static void post_xscf_msg(char *, int); 102 static void pass2xscf_thread(); 103 104 /* 105 * Note FF/DC out-of-order instruction engine takes only a 106 * single cycle to execute each spin loop 107 * for comparison, Panther takes 6 cycles for same loop 108 * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time 109 * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C) 110 * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep 111 * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus 112 * Listed values tuned for 2.15GHz to 2.64GHz systems 113 * Value may change for future systems 114 */ 115 #define OPL_BOFF_SPIN 7 116 #define OPL_BOFF_SLEEP 4 117 #define OPL_BOFF_TM 1600 118 #define OPL_BOFF_MAX_SCALE 8 119 120 #define OPL_CLOCK_TICK_THRESHOLD 128 121 #define OPL_CLOCK_TICK_NCPUS 64 122 123 extern int clock_tick_threshold; 124 extern int clock_tick_ncpus; 125 126 int 127 set_platform_max_ncpus(void) 128 { 129 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 130 } 131 132 int 133 set_platform_tsb_spares(void) 134 { 135 return (MIN(opl_tsb_spares, MAX_UPA)); 136 } 137 138 static void 139 set_model_info() 140 { 141 extern int ts_dispatch_extended; 142 char name[MAXSYSNAME]; 143 int i; 144 145 /* 146 * Get model name from the root node. 147 * 148 * We are using the prom device tree since, at this point, 149 * the Solaris device tree is not yet setup. 150 */ 151 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 152 153 for (i = 0; i < opl_num_models; i++) { 154 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 155 opl_cur_model = &opl_models[i]; 156 break; 157 } 158 } 159 160 /* 161 * If model not matched, it's an unknown model. 162 * Just return. It will default to standard dispatch tables. 163 */ 164 if (i == opl_num_models) 165 return; 166 167 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 168 (ts_dispatch_extended == -1)) { 169 /* 170 * Based on a platform model, select a dispatch table. 171 * Only DC2 and DC3 systems uses the alternate/extended 172 * TS dispatch table. 173 * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch 174 * tables. 175 */ 176 ts_dispatch_extended = 1; 177 } 178 179 } 180 181 static void 182 set_max_mmu_ctxdoms() 183 { 184 extern uint_t max_mmu_ctxdoms; 185 int max_boards; 186 187 /* 188 * From the model, get the maximum number of boards 189 * supported and set the value accordingly. If the model 190 * could not be determined or recognized, we assume the max value. 191 */ 192 if (opl_cur_model == NULL) 193 max_boards = OPL_MAX_BOARDS; 194 else 195 max_boards = opl_cur_model->model_max_boards; 196 197 /* 198 * On OPL, cores and MMUs are one-to-one. 199 */ 200 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 201 } 202 203 #pragma weak mmu_init_large_pages 204 205 void 206 set_platform_defaults(void) 207 { 208 extern char *tod_module_name; 209 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 210 extern void mmu_init_large_pages(size_t); 211 212 /* Set the CPU signature function pointer */ 213 cpu_sgn_func = cpu_sgn_update; 214 215 /* Set appropriate tod module for OPL platform */ 216 ASSERT(tod_module_name == NULL); 217 tod_module_name = "todopl"; 218 219 if ((mmu_page_sizes == max_mmu_page_sizes) && 220 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 221 if (&mmu_init_large_pages) 222 mmu_init_large_pages(mmu_ism_pagesize); 223 } 224 225 tsb_lgrp_affinity = 1; 226 227 set_max_mmu_ctxdoms(); 228 229 /* set OPL threshold for compressed dumps */ 230 dump_plat_mincpu_default = DUMP_PLAT_SUN4U_OPL_MINCPU; 231 } 232 233 /* 234 * Convert logical a board number to a physical one. 235 */ 236 237 #define LSBPROP "board#" 238 #define PSBPROP "physical-board#" 239 240 int 241 opl_get_physical_board(int id) 242 { 243 dev_info_t *root_dip, *dip = NULL; 244 char *dname = NULL; 245 246 pnode_t pnode; 247 char pname[MAXSYSNAME] = {0}; 248 249 int lsb_id; /* Logical System Board ID */ 250 int psb_id; /* Physical System Board ID */ 251 252 253 /* 254 * This function is called on early stage of bootup when the 255 * kernel device tree is not initialized yet, and also 256 * later on when the device tree is up. We want to try 257 * the fast track first. 258 */ 259 root_dip = ddi_root_node(); 260 if (root_dip) { 261 /* Get from devinfo node */ 262 ndi_devi_enter(root_dip); 263 for (dip = ddi_get_child(root_dip); dip; 264 dip = ddi_get_next_sibling(dip)) { 265 266 dname = ddi_node_name(dip); 267 if (strncmp(dname, "pseudo-mc", 9) != 0) 268 continue; 269 270 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 271 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 272 continue; 273 274 if (id == lsb_id) { 275 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 276 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 277 == -1) { 278 ndi_devi_exit(root_dip); 279 return (-1); 280 } else { 281 ndi_devi_exit(root_dip); 282 return (psb_id); 283 } 284 } 285 } 286 ndi_devi_exit(root_dip); 287 } 288 289 /* 290 * We do not have the kernel device tree, or we did not 291 * find the node for some reason (let's say the kernel 292 * device tree was modified), let's try the OBP tree. 293 */ 294 pnode = prom_rootnode(); 295 for (pnode = prom_childnode(pnode); pnode; 296 pnode = prom_nextnode(pnode)) { 297 298 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 299 (strncmp(pname, "pseudo-mc", 9) != 0)) 300 continue; 301 302 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 303 continue; 304 305 if (id == lsb_id) { 306 if (prom_getprop(pnode, PSBPROP, 307 (caddr_t)&psb_id) == -1) { 308 return (-1); 309 } else { 310 return (psb_id); 311 } 312 } 313 } 314 315 return (-1); 316 } 317 318 /* 319 * For OPL it's possible that memory from two or more successive boards 320 * will be contiguous across the boards, and therefore represented as a 321 * single chunk. 322 * This function splits such chunks down the board boundaries. 323 */ 324 static struct memlist * 325 opl_memlist_per_board(struct memlist *ml) 326 { 327 uint64_t ssize, low, high, boundary; 328 struct memlist *head, *tail, *new; 329 330 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 331 332 head = tail = NULL; 333 334 for (; ml; ml = ml->ml_next) { 335 low = (uint64_t)ml->ml_address; 336 high = low+(uint64_t)(ml->ml_size); 337 while (low < high) { 338 boundary = roundup(low+1, ssize); 339 boundary = MIN(high, boundary); 340 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 341 new->ml_address = low; 342 new->ml_size = boundary - low; 343 if (head == NULL) 344 head = new; 345 if (tail) { 346 tail->ml_next = new; 347 new->ml_prev = tail; 348 } 349 tail = new; 350 low = boundary; 351 } 352 } 353 return (head); 354 } 355 356 void 357 set_platform_cage_params(void) 358 { 359 extern pgcnt_t total_pages; 360 extern struct memlist *phys_avail; 361 struct memlist *ml, *tml; 362 363 if (kernel_cage_enable) { 364 pgcnt_t preferred_cage_size; 365 366 preferred_cage_size = MAX(opl_startup_cage_size, 367 total_pages / 256); 368 369 ml = opl_memlist_per_board(phys_avail); 370 371 /* 372 * Note: we are assuming that post has load the 373 * whole show in to the high end of memory. Having 374 * taken this leap, we copy the whole of phys_avail 375 * the glist and arrange for the cage to grow 376 * downward (descending pfns). 377 */ 378 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 379 380 /* free the memlist */ 381 do { 382 tml = ml->ml_next; 383 kmem_free(ml, sizeof (struct memlist)); 384 ml = tml; 385 } while (ml != NULL); 386 } 387 388 if (kcage_on) 389 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 390 else 391 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 392 } 393 394 /*ARGSUSED*/ 395 int 396 plat_cpu_poweron(struct cpu *cp) 397 { 398 int (*opl_cpu_poweron)(struct cpu *) = NULL; 399 400 opl_cpu_poweron = 401 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 402 403 if (opl_cpu_poweron == NULL) 404 return (ENOTSUP); 405 else 406 return ((opl_cpu_poweron)(cp)); 407 408 } 409 410 /*ARGSUSED*/ 411 int 412 plat_cpu_poweroff(struct cpu *cp) 413 { 414 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 415 416 opl_cpu_poweroff = 417 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 418 419 if (opl_cpu_poweroff == NULL) 420 return (ENOTSUP); 421 else 422 return ((opl_cpu_poweroff)(cp)); 423 424 } 425 426 int 427 plat_max_boards(void) 428 { 429 /* 430 * If the model cannot be determined, default to the max value. 431 * Otherwise, Ikkaku model only supports 1 system board. 432 */ 433 if ((opl_cur_model != NULL) && (opl_cur_model->model_type == IKKAKU)) 434 return (OPL_MAX_BOARDS_IKKAKU); 435 else 436 return (OPL_MAX_BOARDS); 437 } 438 439 int 440 plat_max_cpu_units_per_board(void) 441 { 442 return (OPL_MAX_CPU_PER_BOARD); 443 } 444 445 int 446 plat_max_mem_units_per_board(void) 447 { 448 return (OPL_MAX_MEM_UNITS_PER_BOARD); 449 } 450 451 int 452 plat_max_io_units_per_board(void) 453 { 454 return (OPL_MAX_IO_UNITS_PER_BOARD); 455 } 456 457 int 458 plat_max_cmp_units_per_board(void) 459 { 460 return (OPL_MAX_CMP_UNITS_PER_BOARD); 461 } 462 463 int 464 plat_max_core_units_per_board(void) 465 { 466 return (OPL_MAX_CORE_UNITS_PER_BOARD); 467 } 468 469 int 470 plat_pfn_to_mem_node(pfn_t pfn) 471 { 472 return (pfn >> mem_node_pfn_shift); 473 } 474 475 /* ARGSUSED */ 476 void 477 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 478 { 479 size_t elem; 480 pfn_t basepfn; 481 pgcnt_t npgs; 482 uint64_t boundary, ssize; 483 uint64_t low, high; 484 485 /* 486 * OPL mem slices are always aligned on a 256GB boundary. 487 */ 488 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 489 mem_node_physalign = 0; 490 491 /* 492 * Boot install lists are arranged <addr, len>, <addr, len>, ... 493 */ 494 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 495 for (elem = 0; elem < nelems; list++, elem++) { 496 low = list->addr; 497 high = low + list->size; 498 while (low < high) { 499 boundary = roundup(low+1, ssize); 500 boundary = MIN(high, boundary); 501 basepfn = btop(low); 502 npgs = btop(boundary - low); 503 mem_node_add_slice(basepfn, basepfn + npgs - 1); 504 low = boundary; 505 } 506 } 507 } 508 509 /* 510 * Find the CPU associated with a slice at boot-time. 511 */ 512 void 513 plat_fill_mc(pnode_t nodeid) 514 { 515 int board; 516 int memnode; 517 struct { 518 uint64_t addr; 519 uint64_t size; 520 } mem_range; 521 522 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 523 panic("Can not find board# property in mc node %x", nodeid); 524 } 525 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 526 panic("Can not find sb-mem-ranges property in mc node %x", 527 nodeid); 528 } 529 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 530 plat_assign_lgrphand_to_mem_node(board, memnode); 531 } 532 533 /* 534 * Return the platform handle for the lgroup containing the given CPU 535 * 536 * For OPL, lgroup platform handle == board #. 537 */ 538 539 extern int mpo_disabled; 540 extern lgrp_handle_t lgrp_default_handle; 541 542 lgrp_handle_t 543 plat_lgrp_cpu_to_hand(processorid_t id) 544 { 545 lgrp_handle_t plathand; 546 547 /* 548 * Return the real platform handle for the CPU until 549 * such time as we know that MPO should be disabled. 550 * At that point, we set the "mpo_disabled" flag to true, 551 * and from that point on, return the default handle. 552 * 553 * By the time we know that MPO should be disabled, the 554 * first CPU will have already been added to a leaf 555 * lgroup, but that's ok. The common lgroup code will 556 * double check that the boot CPU is in the correct place, 557 * and in the case where mpo should be disabled, will move 558 * it to the root if necessary. 559 */ 560 if (mpo_disabled) { 561 /* If MPO is disabled, return the default (UMA) handle */ 562 plathand = lgrp_default_handle; 563 } else 564 plathand = (lgrp_handle_t)LSB_ID(id); 565 return (plathand); 566 } 567 568 /* 569 * Platform specific lgroup initialization 570 */ 571 void 572 plat_lgrp_init(void) 573 { 574 extern uint32_t lgrp_expand_proc_thresh; 575 extern uint32_t lgrp_expand_proc_diff; 576 const uint_t m = LGRP_LOADAVG_THREAD_MAX; 577 578 /* 579 * Set tuneables for the OPL architecture 580 * 581 * lgrp_expand_proc_thresh is the threshold load on the set of 582 * lgroups a process is currently using on before considering 583 * adding another lgroup to the set. For Oly-C and Jupiter 584 * systems, there are four sockets per lgroup. Setting 585 * lgrp_expand_proc_thresh to add lgroups when the load reaches 586 * four threads will spread the load when it exceeds one thread 587 * per socket, optimizing memory bandwidth and L2 cache space. 588 * 589 * lgrp_expand_proc_diff determines how much less another lgroup 590 * must be loaded before shifting the start location of a thread 591 * to it. 592 * 593 * lgrp_loadavg_tolerance is the threshold where two lgroups are 594 * considered to have different loads. It is set to be less than 595 * 1% so that even a small residual load will be considered different 596 * from no residual load. 597 * 598 * We note loadavg values are not precise. 599 * Every 1/10 of a second loadavg values are reduced by 5%. 600 * This adjustment can come in the middle of the lgroup selection 601 * process, and for larger parallel apps with many threads can 602 * frequently occur between the start of the second thread 603 * placement and the finish of the last thread placement. 604 * We also must be careful to not use too small of a threshold 605 * since the cumulative decay for 1 second idle time is 40%. 606 * That is, the residual load from completed threads will still 607 * be 60% one second after the proc goes idle or 8% after 5 seconds. 608 * 609 * To allow for lag time in loadavg calculations 610 * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX 611 * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX 612 * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX 613 * 614 * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX 615 * as the equivalent of a load of 1. To make the code more compact, 616 * we set m = LGRP_LOADAVG_THREAD_MAX. 617 */ 618 lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2); 619 lgrp_expand_proc_diff = (m >> 1) + (m >> 2); 620 lgrp_loadavg_tolerance = (m >> 7); 621 } 622 623 /* 624 * Platform notification of lgroup (re)configuration changes 625 */ 626 /*ARGSUSED*/ 627 void 628 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 629 { 630 update_membounds_t *umb; 631 lgrp_config_mem_rename_t lmr; 632 int sbd, tbd; 633 lgrp_handle_t hand, shand, thand; 634 int mnode, snode, tnode; 635 pfn_t start, end; 636 637 if (mpo_disabled) 638 return; 639 640 switch (evt) { 641 642 case LGRP_CONFIG_MEM_ADD: 643 /* 644 * Establish the lgroup handle to memnode translation. 645 */ 646 umb = (update_membounds_t *)arg; 647 648 hand = umb->u_board; 649 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 650 plat_assign_lgrphand_to_mem_node(hand, mnode); 651 652 break; 653 654 case LGRP_CONFIG_MEM_DEL: 655 /* 656 * Special handling for possible memory holes. 657 */ 658 umb = (update_membounds_t *)arg; 659 hand = umb->u_board; 660 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 661 if (mem_node_config[mnode].exists) { 662 start = mem_node_config[mnode].physbase; 663 end = mem_node_config[mnode].physmax; 664 mem_node_del_slice(start, end); 665 } 666 } 667 668 break; 669 670 case LGRP_CONFIG_MEM_RENAME: 671 /* 672 * During a DR copy-rename operation, all of the memory 673 * on one board is moved to another board -- but the 674 * addresses/pfns and memnodes don't change. This means 675 * the memory has changed locations without changing identity. 676 * 677 * Source is where we are copying from and target is where we 678 * are copying to. After source memnode is copied to target 679 * memnode, the physical addresses of the target memnode are 680 * renamed to match what the source memnode had. Then target 681 * memnode can be removed and source memnode can take its 682 * place. 683 * 684 * To do this, swap the lgroup handle to memnode mappings for 685 * the boards, so target lgroup will have source memnode and 686 * source lgroup will have empty target memnode which is where 687 * its memory will go (if any is added to it later). 688 * 689 * Then source memnode needs to be removed from its lgroup 690 * and added to the target lgroup where the memory was living 691 * but under a different name/memnode. The memory was in the 692 * target memnode and now lives in the source memnode with 693 * different physical addresses even though it is the same 694 * memory. 695 */ 696 sbd = arg & 0xffff; 697 tbd = (arg & 0xffff0000) >> 16; 698 shand = sbd; 699 thand = tbd; 700 snode = plat_lgrphand_to_mem_node(shand); 701 tnode = plat_lgrphand_to_mem_node(thand); 702 703 /* 704 * Special handling for possible memory holes. 705 */ 706 if (tnode != -1 && mem_node_config[tnode].exists) { 707 start = mem_node_config[tnode].physbase; 708 end = mem_node_config[tnode].physmax; 709 mem_node_del_slice(start, end); 710 } 711 712 plat_assign_lgrphand_to_mem_node(thand, snode); 713 plat_assign_lgrphand_to_mem_node(shand, tnode); 714 715 lmr.lmem_rename_from = shand; 716 lmr.lmem_rename_to = thand; 717 718 /* 719 * Remove source memnode of copy rename from its lgroup 720 * and add it to its new target lgroup 721 */ 722 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 723 (uintptr_t)&lmr); 724 725 break; 726 727 default: 728 break; 729 } 730 } 731 732 /* 733 * Return latency between "from" and "to" lgroups 734 * 735 * This latency number can only be used for relative comparison 736 * between lgroups on the running system, cannot be used across platforms, 737 * and may not reflect the actual latency. It is platform and implementation 738 * specific, so platform gets to decide its value. It would be nice if the 739 * number was at least proportional to make comparisons more meaningful though. 740 * NOTE: The numbers below are supposed to be load latencies for uncached 741 * memory divided by 10. 742 * 743 */ 744 int 745 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 746 { 747 /* 748 * Return min remote latency when there are more than two lgroups 749 * (root and child) and getting latency between two different lgroups 750 * or root is involved 751 */ 752 if (lgrp_optimizations() && (from != to || 753 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 754 return (42); 755 else 756 return (35); 757 } 758 759 /* 760 * Return platform handle for root lgroup 761 */ 762 lgrp_handle_t 763 plat_lgrp_root_hand(void) 764 { 765 if (mpo_disabled) 766 return (lgrp_default_handle); 767 768 return (LGRP_DEFAULT_HANDLE); 769 } 770 771 /*ARGSUSED*/ 772 void 773 plat_freelist_process(int mnode) 774 { 775 } 776 777 void 778 load_platform_drivers(void) 779 { 780 (void) i_ddi_attach_pseudo_node("dr"); 781 } 782 783 /* 784 * No platform drivers on this platform 785 */ 786 char *platform_module_list[] = { 787 (char *)0 788 }; 789 790 /*ARGSUSED*/ 791 void 792 plat_tod_fault(enum tod_fault_type tod_bad) 793 { 794 } 795 796 /*ARGSUSED*/ 797 void 798 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 799 { 800 static void (*scf_panic_callback)(int); 801 static void (*scf_shutdown_callback)(int); 802 803 /* 804 * This is for notifing system panic/shutdown to SCF. 805 * In case of shutdown and panic, SCF call back 806 * function should be called. 807 * <SCF call back functions> 808 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 809 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 810 * cpuid should be -1 and state should be SIGST_EXIT. 811 */ 812 if (state == SIGST_EXIT && cpuid == -1) { 813 814 /* 815 * find the symbol for the SCF panic callback routine in driver 816 */ 817 if (scf_panic_callback == NULL) 818 scf_panic_callback = (void (*)(int)) 819 modgetsymvalue("scf_panic_callb", 0); 820 if (scf_shutdown_callback == NULL) 821 scf_shutdown_callback = (void (*)(int)) 822 modgetsymvalue("scf_shutdown_callb", 0); 823 824 switch (sub_state) { 825 case SIGSUBST_PANIC: 826 if (scf_panic_callback == NULL) { 827 cmn_err(CE_NOTE, "!cpu_sgn_update: " 828 "scf_panic_callb not found\n"); 829 return; 830 } 831 scf_panic_callback(SIGSUBST_PANIC); 832 break; 833 834 case SIGSUBST_HALT: 835 if (scf_shutdown_callback == NULL) { 836 cmn_err(CE_NOTE, "!cpu_sgn_update: " 837 "scf_shutdown_callb not found\n"); 838 return; 839 } 840 scf_shutdown_callback(SIGSUBST_HALT); 841 break; 842 843 case SIGSUBST_ENVIRON: 844 if (scf_shutdown_callback == NULL) { 845 cmn_err(CE_NOTE, "!cpu_sgn_update: " 846 "scf_shutdown_callb not found\n"); 847 return; 848 } 849 scf_shutdown_callback(SIGSUBST_ENVIRON); 850 break; 851 852 case SIGSUBST_REBOOT: 853 if (scf_shutdown_callback == NULL) { 854 cmn_err(CE_NOTE, "!cpu_sgn_update: " 855 "scf_shutdown_callb not found\n"); 856 return; 857 } 858 scf_shutdown_callback(SIGSUBST_REBOOT); 859 break; 860 } 861 } 862 } 863 864 /*ARGSUSED*/ 865 int 866 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 867 int flt_in_memory, ushort_t flt_status, 868 char *buf, int buflen, int *lenp) 869 { 870 /* 871 * check if it's a Memory error. 872 */ 873 if (flt_in_memory) { 874 if (opl_get_mem_unum != NULL) { 875 return (opl_get_mem_unum(synd_code, flt_addr, buf, 876 buflen, lenp)); 877 } else { 878 return (ENOTSUP); 879 } 880 } else { 881 return (ENOTSUP); 882 } 883 } 884 885 /*ARGSUSED*/ 886 int 887 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 888 { 889 int ret = 0; 890 int sb; 891 int plen; 892 893 sb = opl_get_physical_board(LSB_ID(cpuid)); 894 if (sb == -1) { 895 return (ENXIO); 896 } 897 898 /* 899 * opl_cur_model is assigned here 900 */ 901 if (opl_cur_model == NULL) { 902 set_model_info(); 903 904 /* 905 * if not matched, return 906 */ 907 if (opl_cur_model == NULL) 908 return (ENODEV); 909 } 910 911 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 912 913 switch (opl_cur_model->model_type) { 914 case FF1: 915 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 916 CHIP_ID(cpuid) / 2); 917 break; 918 919 case FF2: 920 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 921 (CHIP_ID(cpuid) / 2) + (sb * 2)); 922 break; 923 924 case DC1: 925 case DC2: 926 case DC3: 927 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 928 CHIP_ID(cpuid)); 929 break; 930 931 case IKKAKU: 932 plen = snprintf(buf, buflen, "/%s", "MBU_A"); 933 break; 934 935 default: 936 /* This should never happen */ 937 return (ENODEV); 938 } 939 940 if (plen >= buflen) { 941 ret = ENOSPC; 942 } else { 943 if (lenp) 944 *lenp = strlen(buf); 945 } 946 return (ret); 947 } 948 949 void 950 plat_nodename_set(void) 951 { 952 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 953 } 954 955 caddr_t efcode_vaddr = NULL; 956 957 /* 958 * Preallocate enough memory for fcode claims. 959 */ 960 961 caddr_t 962 efcode_alloc(caddr_t alloc_base) 963 { 964 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 965 MMU_PAGESIZE); 966 caddr_t vaddr; 967 968 /* 969 * allocate the physical memory for the Oberon fcode. 970 */ 971 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 972 efcode_size, MMU_PAGESIZE)) == NULL) 973 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 974 975 efcode_vaddr = vaddr; 976 977 return (efcode_alloc_base + efcode_size); 978 } 979 980 caddr_t 981 plat_startup_memlist(caddr_t alloc_base) 982 { 983 caddr_t tmp_alloc_base; 984 985 tmp_alloc_base = efcode_alloc(alloc_base); 986 tmp_alloc_base = 987 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 988 return (tmp_alloc_base); 989 } 990 991 /* need to forward declare these */ 992 static void plat_lock_delay(uint_t); 993 994 void 995 startup_platform(void) 996 { 997 if (clock_tick_threshold == 0) 998 clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD; 999 if (clock_tick_ncpus == 0) 1000 clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS; 1001 mutex_lock_delay = plat_lock_delay; 1002 mutex_cap_factor = OPL_BOFF_MAX_SCALE; 1003 } 1004 1005 static uint_t 1006 get_mmu_id(processorid_t cpuid) 1007 { 1008 int pb = opl_get_physical_board(LSB_ID(cpuid)); 1009 1010 if (pb == -1) { 1011 cmn_err(CE_PANIC, 1012 "opl_get_physical_board failed (cpu %d LSB %u)", 1013 cpuid, LSB_ID(cpuid)); 1014 } 1015 return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) * 1016 OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid); 1017 } 1018 1019 void 1020 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 1021 { 1022 int impl; 1023 1024 impl = cpunodes[cpuid].implementation; 1025 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 1026 info->mmu_idx = get_mmu_id(cpuid); 1027 info->mmu_nctxs = 8192; 1028 } else { 1029 cmn_err(CE_PANIC, "Unknown processor %d", impl); 1030 } 1031 } 1032 1033 int 1034 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1035 { 1036 if (opl_get_mem_sid == NULL) { 1037 return (ENOTSUP); 1038 } 1039 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 1040 } 1041 1042 int 1043 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 1044 { 1045 if (opl_get_mem_offset == NULL) { 1046 return (ENOTSUP); 1047 } 1048 return (opl_get_mem_offset(paddr, offp)); 1049 } 1050 1051 int 1052 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1053 { 1054 if (opl_get_mem_addr == NULL) { 1055 return (ENOTSUP); 1056 } 1057 return (opl_get_mem_addr(unum, sid, offset, addrp)); 1058 } 1059 1060 void 1061 plat_lock_delay(uint_t backoff) 1062 { 1063 int i; 1064 uint_t cnt, remcnt; 1065 int ctr; 1066 hrtime_t delay_start, rem_delay; 1067 /* 1068 * Platform specific lock delay code for OPL 1069 * 1070 * Using staged linear increases in the delay. 1071 * The sleep instruction is the preferred method of delay, 1072 * but is too large of granularity for the initial backoff. 1073 */ 1074 1075 if (backoff < 100) { 1076 /* 1077 * If desired backoff is long enough, 1078 * use sleep for most of it 1079 */ 1080 for (cnt = backoff; 1081 cnt >= OPL_BOFF_SLEEP; 1082 cnt -= OPL_BOFF_SLEEP) { 1083 cpu_smt_pause(); 1084 } 1085 /* 1086 * spin for small remainder of backoff 1087 */ 1088 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1089 mutex_delay_default(); 1090 } 1091 } else { 1092 /* backoff is large. Fill it by sleeping */ 1093 delay_start = gethrtime_waitfree(); 1094 cnt = backoff / OPL_BOFF_SLEEP; 1095 /* 1096 * use sleep instructions for delay 1097 */ 1098 for (i = 0; i < cnt; i++) { 1099 cpu_smt_pause(); 1100 } 1101 1102 /* 1103 * Note: if the other strand executes a sleep instruction, 1104 * then the sleep ends immediately with a minimum time of 1105 * 42 clocks. We check gethrtime to insure we have 1106 * waited long enough. And we include both a short 1107 * spin loop and a sleep for repeated delay times. 1108 */ 1109 1110 rem_delay = gethrtime_waitfree() - delay_start; 1111 while (rem_delay < cnt * OPL_BOFF_TM) { 1112 remcnt = cnt - (rem_delay / OPL_BOFF_TM); 1113 for (i = 0; i < remcnt; i++) { 1114 cpu_smt_pause(); 1115 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1116 mutex_delay_default(); 1117 } 1118 } 1119 rem_delay = gethrtime_waitfree() - delay_start; 1120 } 1121 } 1122 } 1123 1124 /* 1125 * The following code implements asynchronous call to XSCF to setup the 1126 * domain node name. 1127 */ 1128 1129 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1130 1131 /* 1132 * The following three macros define the all operations on the request 1133 * list we are using here, and hide the details of the list 1134 * implementation from the code. 1135 */ 1136 #define PUSH(m) \ 1137 { \ 1138 (m)->next = ctl_msg.head; \ 1139 (m)->prev = NULL; \ 1140 if ((m)->next != NULL) \ 1141 (m)->next->prev = (m); \ 1142 ctl_msg.head = (m); \ 1143 } 1144 1145 #define REMOVE(m) \ 1146 { \ 1147 if ((m)->prev != NULL) \ 1148 (m)->prev->next = (m)->next; \ 1149 else \ 1150 ctl_msg.head = (m)->next; \ 1151 if ((m)->next != NULL) \ 1152 (m)->next->prev = (m)->prev; \ 1153 } 1154 1155 #define FREE_THE_TAIL(head) \ 1156 { \ 1157 nm_msg_t *n_msg, *m; \ 1158 m = (head)->next; \ 1159 (head)->next = NULL; \ 1160 while (m != NULL) { \ 1161 n_msg = m->next; \ 1162 FREE_MSG(m); \ 1163 m = n_msg; \ 1164 } \ 1165 } 1166 1167 #define SCF_PUTINFO(f, s, p) \ 1168 f(KEY_ESCF, 0x01, 0, s, p) 1169 1170 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1171 (m)->len, (m)->data)) == 0) 1172 1173 /* 1174 * The value of the following macro loosely depends on the 1175 * value of the "device busy" timeout used in the SCF driver. 1176 * (See pass2xscf_thread()). 1177 */ 1178 #define SCF_DEVBUSY_DELAY 10 1179 1180 /* 1181 * The default number of attempts to contact the scf driver 1182 * if we cannot fetch any information about the timeout value 1183 * it uses. 1184 */ 1185 1186 #define REPEATS 4 1187 1188 typedef struct nm_msg { 1189 struct nm_msg *next; 1190 struct nm_msg *prev; 1191 int len; 1192 char data[1]; 1193 } nm_msg_t; 1194 1195 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1196 1197 static struct ctlmsg { 1198 nm_msg_t *head; 1199 nm_msg_t *now_serving; 1200 kmutex_t nm_lock; 1201 kthread_t *nmt; 1202 int cnt; 1203 int (*scf_service_function)(uint32_t, uint8_t, 1204 uint32_t, uint32_t, void *); 1205 } ctl_msg; 1206 1207 static void 1208 post_xscf_msg(char *dp, int len) 1209 { 1210 nm_msg_t *msg; 1211 1212 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1213 1214 bcopy(dp, msg->data, len); 1215 msg->len = len; 1216 1217 mutex_enter(&ctl_msg.nm_lock); 1218 if (ctl_msg.nmt == NULL) { 1219 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1220 NULL, 0, &p0, TS_RUN, minclsyspri); 1221 } 1222 1223 PUSH(msg); 1224 ctl_msg.cnt++; 1225 mutex_exit(&ctl_msg.nm_lock); 1226 } 1227 1228 static void 1229 pass2xscf_thread() 1230 { 1231 nm_msg_t *msg; 1232 int ret; 1233 uint_t i, msg_sent, xscf_driver_delay; 1234 static uint_t repeat_cnt; 1235 uint_t *scf_wait_cnt; 1236 1237 mutex_enter(&ctl_msg.nm_lock); 1238 1239 /* 1240 * Find the address of the SCF put routine if it's not done yet. 1241 */ 1242 if (ctl_msg.scf_service_function == NULL) { 1243 if ((ctl_msg.scf_service_function = 1244 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1245 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1246 cmn_err(CE_NOTE, "pass2xscf_thread: " 1247 "scf_service_putinfo not found\n"); 1248 ctl_msg.nmt = NULL; 1249 mutex_exit(&ctl_msg.nm_lock); 1250 return; 1251 } 1252 } 1253 1254 /* 1255 * Calculate the number of attempts to connect XSCF based on the 1256 * scf driver delay (which is 1257 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1258 * of xscf_connect_delay (the total number of seconds to wait 1259 * till xscf get ready.) 1260 */ 1261 if (repeat_cnt == 0) { 1262 if ((scf_wait_cnt = 1263 (uint_t *) 1264 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1265 repeat_cnt = REPEATS; 1266 } else { 1267 1268 xscf_driver_delay = *scf_wait_cnt * 1269 SCF_DEVBUSY_DELAY; 1270 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1271 } 1272 } 1273 1274 while (ctl_msg.cnt != 0) { 1275 1276 /* 1277 * Take the very last request from the queue, 1278 */ 1279 ctl_msg.now_serving = ctl_msg.head; 1280 ASSERT(ctl_msg.now_serving != NULL); 1281 1282 /* 1283 * and discard all the others if any. 1284 */ 1285 FREE_THE_TAIL(ctl_msg.now_serving); 1286 ctl_msg.cnt = 1; 1287 mutex_exit(&ctl_msg.nm_lock); 1288 1289 /* 1290 * Pass the name to XSCF. Note please, we do not hold the 1291 * mutex while we are doing this. 1292 */ 1293 msg_sent = 0; 1294 for (i = 0; i < repeat_cnt; i++) { 1295 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1296 msg_sent = 1; 1297 break; 1298 } else { 1299 if (ret != EBUSY) { 1300 cmn_err(CE_NOTE, "pass2xscf_thread:" 1301 " unexpected return code" 1302 " from scf_service_putinfo():" 1303 " %d\n", ret); 1304 } 1305 } 1306 } 1307 1308 if (msg_sent) { 1309 1310 /* 1311 * Remove the request from the list 1312 */ 1313 mutex_enter(&ctl_msg.nm_lock); 1314 msg = ctl_msg.now_serving; 1315 ctl_msg.now_serving = NULL; 1316 REMOVE(msg); 1317 ctl_msg.cnt--; 1318 mutex_exit(&ctl_msg.nm_lock); 1319 FREE_MSG(msg); 1320 } else { 1321 1322 /* 1323 * If while we have tried to communicate with 1324 * XSCF there were any other requests we are 1325 * going to drop this one and take the latest 1326 * one. Otherwise we will try to pass this one 1327 * again. 1328 */ 1329 cmn_err(CE_NOTE, 1330 "pass2xscf_thread: " 1331 "scf_service_putinfo " 1332 "not responding\n"); 1333 } 1334 mutex_enter(&ctl_msg.nm_lock); 1335 } 1336 1337 /* 1338 * The request queue is empty, exit. 1339 */ 1340 ctl_msg.nmt = NULL; 1341 mutex_exit(&ctl_msg.nm_lock); 1342 } 1343