1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/cpuvar.h> 26 #include <sys/systm.h> 27 #include <sys/sysmacros.h> 28 #include <sys/promif.h> 29 #include <sys/platform_module.h> 30 #include <sys/cmn_err.h> 31 #include <sys/errno.h> 32 #include <sys/machsystm.h> 33 #include <sys/bootconf.h> 34 #include <sys/nvpair.h> 35 #include <sys/kobj.h> 36 #include <sys/mem_cage.h> 37 #include <sys/opl.h> 38 #include <sys/scfd/scfostoescf.h> 39 #include <sys/cpu_sgnblk_defs.h> 40 #include <sys/utsname.h> 41 #include <sys/ddi.h> 42 #include <sys/sunndi.h> 43 #include <sys/lgrp.h> 44 #include <sys/memnode.h> 45 #include <sys/sysmacros.h> 46 #include <sys/time.h> 47 #include <sys/cpu.h> 48 #include <sys/dumphdr.h> 49 #include <vm/vm_dep.h> 50 51 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 52 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 53 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 54 int (*opl_get_mem_addr)(char *unum, char *sid, 55 uint64_t offset, uint64_t *paddr); 56 57 /* Memory for fcode claims. 16k times # maximum possible IO units */ 58 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 59 int efcode_size = EFCODE_SIZE; 60 61 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 62 63 /* Set the maximum number of boards for DR */ 64 int opl_boards = OPL_MAX_BOARDS; 65 66 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 67 68 extern int tsb_lgrp_affinity; 69 70 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 71 (OPL_MAX_TSBS_PER_PCICH); 72 73 pgcnt_t opl_startup_cage_size = 0; 74 75 /* 76 * The length of the delay in seconds in communication with XSCF after 77 * which the warning message will be logged. 78 */ 79 uint_t xscf_connect_delay = 60 * 15; 80 81 static opl_model_info_t opl_models[] = { 82 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 83 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 84 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 85 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 86 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 87 { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE }, 88 }; 89 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 90 91 /* 92 * opl_cur_model 93 */ 94 static opl_model_info_t *opl_cur_model = NULL; 95 96 static struct memlist *opl_memlist_per_board(struct memlist *ml); 97 static void post_xscf_msg(char *, int); 98 static void pass2xscf_thread(); 99 100 /* 101 * Note FF/DC out-of-order instruction engine takes only a 102 * single cycle to execute each spin loop 103 * for comparison, Panther takes 6 cycles for same loop 104 * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time 105 * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C) 106 * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep 107 * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus 108 * Listed values tuned for 2.15GHz to 2.64GHz systems 109 * Value may change for future systems 110 */ 111 #define OPL_BOFF_SPIN 7 112 #define OPL_BOFF_SLEEP 4 113 #define OPL_BOFF_TM 1600 114 #define OPL_BOFF_MAX_SCALE 8 115 116 #define OPL_CLOCK_TICK_THRESHOLD 128 117 #define OPL_CLOCK_TICK_NCPUS 64 118 119 extern int clock_tick_threshold; 120 extern int clock_tick_ncpus; 121 122 int 123 set_platform_max_ncpus(void) 124 { 125 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 126 } 127 128 int 129 set_platform_tsb_spares(void) 130 { 131 return (MIN(opl_tsb_spares, MAX_UPA)); 132 } 133 134 static void 135 set_model_info() 136 { 137 extern int ts_dispatch_extended; 138 char name[MAXSYSNAME]; 139 int i; 140 141 /* 142 * Get model name from the root node. 143 * 144 * We are using the prom device tree since, at this point, 145 * the Solaris device tree is not yet setup. 146 */ 147 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 148 149 for (i = 0; i < opl_num_models; i++) { 150 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 151 opl_cur_model = &opl_models[i]; 152 break; 153 } 154 } 155 156 /* 157 * If model not matched, it's an unknown model. 158 * Just return. It will default to standard dispatch tables. 159 */ 160 if (i == opl_num_models) 161 return; 162 163 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 164 (ts_dispatch_extended == -1)) { 165 /* 166 * Based on a platform model, select a dispatch table. 167 * Only DC2 and DC3 systems uses the alternate/extended 168 * TS dispatch table. 169 * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch 170 * tables. 171 */ 172 ts_dispatch_extended = 1; 173 } 174 175 } 176 177 static void 178 set_max_mmu_ctxdoms() 179 { 180 extern uint_t max_mmu_ctxdoms; 181 int max_boards; 182 183 /* 184 * From the model, get the maximum number of boards 185 * supported and set the value accordingly. If the model 186 * could not be determined or recognized, we assume the max value. 187 */ 188 if (opl_cur_model == NULL) 189 max_boards = OPL_MAX_BOARDS; 190 else 191 max_boards = opl_cur_model->model_max_boards; 192 193 /* 194 * On OPL, cores and MMUs are one-to-one. 195 */ 196 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 197 } 198 199 #pragma weak mmu_init_large_pages 200 201 void 202 set_platform_defaults(void) 203 { 204 extern char *tod_module_name; 205 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 206 extern void mmu_init_large_pages(size_t); 207 208 /* Set the CPU signature function pointer */ 209 cpu_sgn_func = cpu_sgn_update; 210 211 /* Set appropriate tod module for OPL platform */ 212 ASSERT(tod_module_name == NULL); 213 tod_module_name = "todopl"; 214 215 if ((mmu_page_sizes == max_mmu_page_sizes) && 216 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 217 if (&mmu_init_large_pages) 218 mmu_init_large_pages(mmu_ism_pagesize); 219 } 220 221 tsb_lgrp_affinity = 1; 222 223 set_max_mmu_ctxdoms(); 224 225 /* set OPL threshold for compressed dumps */ 226 dump_plat_mincpu_default = DUMP_PLAT_SUN4U_OPL_MINCPU; 227 } 228 229 /* 230 * Convert logical a board number to a physical one. 231 */ 232 233 #define LSBPROP "board#" 234 #define PSBPROP "physical-board#" 235 236 int 237 opl_get_physical_board(int id) 238 { 239 dev_info_t *root_dip, *dip = NULL; 240 char *dname = NULL; 241 int circ; 242 243 pnode_t pnode; 244 char pname[MAXSYSNAME] = {0}; 245 246 int lsb_id; /* Logical System Board ID */ 247 int psb_id; /* Physical System Board ID */ 248 249 250 /* 251 * This function is called on early stage of bootup when the 252 * kernel device tree is not initialized yet, and also 253 * later on when the device tree is up. We want to try 254 * the fast track first. 255 */ 256 root_dip = ddi_root_node(); 257 if (root_dip) { 258 /* Get from devinfo node */ 259 ndi_devi_enter(root_dip, &circ); 260 for (dip = ddi_get_child(root_dip); dip; 261 dip = ddi_get_next_sibling(dip)) { 262 263 dname = ddi_node_name(dip); 264 if (strncmp(dname, "pseudo-mc", 9) != 0) 265 continue; 266 267 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 268 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 269 continue; 270 271 if (id == lsb_id) { 272 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 273 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 274 == -1) { 275 ndi_devi_exit(root_dip, circ); 276 return (-1); 277 } else { 278 ndi_devi_exit(root_dip, circ); 279 return (psb_id); 280 } 281 } 282 } 283 ndi_devi_exit(root_dip, circ); 284 } 285 286 /* 287 * We do not have the kernel device tree, or we did not 288 * find the node for some reason (let's say the kernel 289 * device tree was modified), let's try the OBP tree. 290 */ 291 pnode = prom_rootnode(); 292 for (pnode = prom_childnode(pnode); pnode; 293 pnode = prom_nextnode(pnode)) { 294 295 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 296 (strncmp(pname, "pseudo-mc", 9) != 0)) 297 continue; 298 299 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 300 continue; 301 302 if (id == lsb_id) { 303 if (prom_getprop(pnode, PSBPROP, 304 (caddr_t)&psb_id) == -1) { 305 return (-1); 306 } else { 307 return (psb_id); 308 } 309 } 310 } 311 312 return (-1); 313 } 314 315 /* 316 * For OPL it's possible that memory from two or more successive boards 317 * will be contiguous across the boards, and therefore represented as a 318 * single chunk. 319 * This function splits such chunks down the board boundaries. 320 */ 321 static struct memlist * 322 opl_memlist_per_board(struct memlist *ml) 323 { 324 uint64_t ssize, low, high, boundary; 325 struct memlist *head, *tail, *new; 326 327 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 328 329 head = tail = NULL; 330 331 for (; ml; ml = ml->ml_next) { 332 low = (uint64_t)ml->ml_address; 333 high = low+(uint64_t)(ml->ml_size); 334 while (low < high) { 335 boundary = roundup(low+1, ssize); 336 boundary = MIN(high, boundary); 337 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 338 new->ml_address = low; 339 new->ml_size = boundary - low; 340 if (head == NULL) 341 head = new; 342 if (tail) { 343 tail->ml_next = new; 344 new->ml_prev = tail; 345 } 346 tail = new; 347 low = boundary; 348 } 349 } 350 return (head); 351 } 352 353 void 354 set_platform_cage_params(void) 355 { 356 extern pgcnt_t total_pages; 357 extern struct memlist *phys_avail; 358 struct memlist *ml, *tml; 359 360 if (kernel_cage_enable) { 361 pgcnt_t preferred_cage_size; 362 363 preferred_cage_size = MAX(opl_startup_cage_size, 364 total_pages / 256); 365 366 ml = opl_memlist_per_board(phys_avail); 367 368 /* 369 * Note: we are assuming that post has load the 370 * whole show in to the high end of memory. Having 371 * taken this leap, we copy the whole of phys_avail 372 * the glist and arrange for the cage to grow 373 * downward (descending pfns). 374 */ 375 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 376 377 /* free the memlist */ 378 do { 379 tml = ml->ml_next; 380 kmem_free(ml, sizeof (struct memlist)); 381 ml = tml; 382 } while (ml != NULL); 383 } 384 385 if (kcage_on) 386 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 387 else 388 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 389 } 390 391 /*ARGSUSED*/ 392 int 393 plat_cpu_poweron(struct cpu *cp) 394 { 395 int (*opl_cpu_poweron)(struct cpu *) = NULL; 396 397 opl_cpu_poweron = 398 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 399 400 if (opl_cpu_poweron == NULL) 401 return (ENOTSUP); 402 else 403 return ((opl_cpu_poweron)(cp)); 404 405 } 406 407 /*ARGSUSED*/ 408 int 409 plat_cpu_poweroff(struct cpu *cp) 410 { 411 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 412 413 opl_cpu_poweroff = 414 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 415 416 if (opl_cpu_poweroff == NULL) 417 return (ENOTSUP); 418 else 419 return ((opl_cpu_poweroff)(cp)); 420 421 } 422 423 int 424 plat_max_boards(void) 425 { 426 /* 427 * If the model cannot be determined, default to the max value. 428 * Otherwise, Ikkaku model only supports 1 system board. 429 */ 430 if ((opl_cur_model != NULL) && (opl_cur_model->model_type == IKKAKU)) 431 return (OPL_MAX_BOARDS_IKKAKU); 432 else 433 return (OPL_MAX_BOARDS); 434 } 435 436 int 437 plat_max_cpu_units_per_board(void) 438 { 439 return (OPL_MAX_CPU_PER_BOARD); 440 } 441 442 int 443 plat_max_mem_units_per_board(void) 444 { 445 return (OPL_MAX_MEM_UNITS_PER_BOARD); 446 } 447 448 int 449 plat_max_io_units_per_board(void) 450 { 451 return (OPL_MAX_IO_UNITS_PER_BOARD); 452 } 453 454 int 455 plat_max_cmp_units_per_board(void) 456 { 457 return (OPL_MAX_CMP_UNITS_PER_BOARD); 458 } 459 460 int 461 plat_max_core_units_per_board(void) 462 { 463 return (OPL_MAX_CORE_UNITS_PER_BOARD); 464 } 465 466 int 467 plat_pfn_to_mem_node(pfn_t pfn) 468 { 469 return (pfn >> mem_node_pfn_shift); 470 } 471 472 /* ARGSUSED */ 473 void 474 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 475 { 476 size_t elem; 477 pfn_t basepfn; 478 pgcnt_t npgs; 479 uint64_t boundary, ssize; 480 uint64_t low, high; 481 482 /* 483 * OPL mem slices are always aligned on a 256GB boundary. 484 */ 485 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 486 mem_node_physalign = 0; 487 488 /* 489 * Boot install lists are arranged <addr, len>, <addr, len>, ... 490 */ 491 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 492 for (elem = 0; elem < nelems; list++, elem++) { 493 low = list->addr; 494 high = low + list->size; 495 while (low < high) { 496 boundary = roundup(low+1, ssize); 497 boundary = MIN(high, boundary); 498 basepfn = btop(low); 499 npgs = btop(boundary - low); 500 mem_node_add_slice(basepfn, basepfn + npgs - 1); 501 low = boundary; 502 } 503 } 504 } 505 506 /* 507 * Find the CPU associated with a slice at boot-time. 508 */ 509 void 510 plat_fill_mc(pnode_t nodeid) 511 { 512 int board; 513 int memnode; 514 struct { 515 uint64_t addr; 516 uint64_t size; 517 } mem_range; 518 519 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 520 panic("Can not find board# property in mc node %x", nodeid); 521 } 522 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 523 panic("Can not find sb-mem-ranges property in mc node %x", 524 nodeid); 525 } 526 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 527 plat_assign_lgrphand_to_mem_node(board, memnode); 528 } 529 530 /* 531 * Return the platform handle for the lgroup containing the given CPU 532 * 533 * For OPL, lgroup platform handle == board #. 534 */ 535 536 extern int mpo_disabled; 537 extern lgrp_handle_t lgrp_default_handle; 538 539 lgrp_handle_t 540 plat_lgrp_cpu_to_hand(processorid_t id) 541 { 542 lgrp_handle_t plathand; 543 544 /* 545 * Return the real platform handle for the CPU until 546 * such time as we know that MPO should be disabled. 547 * At that point, we set the "mpo_disabled" flag to true, 548 * and from that point on, return the default handle. 549 * 550 * By the time we know that MPO should be disabled, the 551 * first CPU will have already been added to a leaf 552 * lgroup, but that's ok. The common lgroup code will 553 * double check that the boot CPU is in the correct place, 554 * and in the case where mpo should be disabled, will move 555 * it to the root if necessary. 556 */ 557 if (mpo_disabled) { 558 /* If MPO is disabled, return the default (UMA) handle */ 559 plathand = lgrp_default_handle; 560 } else 561 plathand = (lgrp_handle_t)LSB_ID(id); 562 return (plathand); 563 } 564 565 /* 566 * Platform specific lgroup initialization 567 */ 568 void 569 plat_lgrp_init(void) 570 { 571 extern uint32_t lgrp_expand_proc_thresh; 572 extern uint32_t lgrp_expand_proc_diff; 573 const uint_t m = LGRP_LOADAVG_THREAD_MAX; 574 575 /* 576 * Set tuneables for the OPL architecture 577 * 578 * lgrp_expand_proc_thresh is the threshold load on the set of 579 * lgroups a process is currently using on before considering 580 * adding another lgroup to the set. For Oly-C and Jupiter 581 * systems, there are four sockets per lgroup. Setting 582 * lgrp_expand_proc_thresh to add lgroups when the load reaches 583 * four threads will spread the load when it exceeds one thread 584 * per socket, optimizing memory bandwidth and L2 cache space. 585 * 586 * lgrp_expand_proc_diff determines how much less another lgroup 587 * must be loaded before shifting the start location of a thread 588 * to it. 589 * 590 * lgrp_loadavg_tolerance is the threshold where two lgroups are 591 * considered to have different loads. It is set to be less than 592 * 1% so that even a small residual load will be considered different 593 * from no residual load. 594 * 595 * We note loadavg values are not precise. 596 * Every 1/10 of a second loadavg values are reduced by 5%. 597 * This adjustment can come in the middle of the lgroup selection 598 * process, and for larger parallel apps with many threads can 599 * frequently occur between the start of the second thread 600 * placement and the finish of the last thread placement. 601 * We also must be careful to not use too small of a threshold 602 * since the cumulative decay for 1 second idle time is 40%. 603 * That is, the residual load from completed threads will still 604 * be 60% one second after the proc goes idle or 8% after 5 seconds. 605 * 606 * To allow for lag time in loadavg calculations 607 * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX 608 * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX 609 * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX 610 * 611 * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX 612 * as the equivalent of a load of 1. To make the code more compact, 613 * we set m = LGRP_LOADAVG_THREAD_MAX. 614 */ 615 lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2); 616 lgrp_expand_proc_diff = (m >> 1) + (m >> 2); 617 lgrp_loadavg_tolerance = (m >> 7); 618 } 619 620 /* 621 * Platform notification of lgroup (re)configuration changes 622 */ 623 /*ARGSUSED*/ 624 void 625 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 626 { 627 update_membounds_t *umb; 628 lgrp_config_mem_rename_t lmr; 629 int sbd, tbd; 630 lgrp_handle_t hand, shand, thand; 631 int mnode, snode, tnode; 632 pfn_t start, end; 633 634 if (mpo_disabled) 635 return; 636 637 switch (evt) { 638 639 case LGRP_CONFIG_MEM_ADD: 640 /* 641 * Establish the lgroup handle to memnode translation. 642 */ 643 umb = (update_membounds_t *)arg; 644 645 hand = umb->u_board; 646 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 647 plat_assign_lgrphand_to_mem_node(hand, mnode); 648 649 break; 650 651 case LGRP_CONFIG_MEM_DEL: 652 /* 653 * Special handling for possible memory holes. 654 */ 655 umb = (update_membounds_t *)arg; 656 hand = umb->u_board; 657 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 658 if (mem_node_config[mnode].exists) { 659 start = mem_node_config[mnode].physbase; 660 end = mem_node_config[mnode].physmax; 661 mem_node_del_slice(start, end); 662 } 663 } 664 665 break; 666 667 case LGRP_CONFIG_MEM_RENAME: 668 /* 669 * During a DR copy-rename operation, all of the memory 670 * on one board is moved to another board -- but the 671 * addresses/pfns and memnodes don't change. This means 672 * the memory has changed locations without changing identity. 673 * 674 * Source is where we are copying from and target is where we 675 * are copying to. After source memnode is copied to target 676 * memnode, the physical addresses of the target memnode are 677 * renamed to match what the source memnode had. Then target 678 * memnode can be removed and source memnode can take its 679 * place. 680 * 681 * To do this, swap the lgroup handle to memnode mappings for 682 * the boards, so target lgroup will have source memnode and 683 * source lgroup will have empty target memnode which is where 684 * its memory will go (if any is added to it later). 685 * 686 * Then source memnode needs to be removed from its lgroup 687 * and added to the target lgroup where the memory was living 688 * but under a different name/memnode. The memory was in the 689 * target memnode and now lives in the source memnode with 690 * different physical addresses even though it is the same 691 * memory. 692 */ 693 sbd = arg & 0xffff; 694 tbd = (arg & 0xffff0000) >> 16; 695 shand = sbd; 696 thand = tbd; 697 snode = plat_lgrphand_to_mem_node(shand); 698 tnode = plat_lgrphand_to_mem_node(thand); 699 700 /* 701 * Special handling for possible memory holes. 702 */ 703 if (tnode != -1 && mem_node_config[tnode].exists) { 704 start = mem_node_config[tnode].physbase; 705 end = mem_node_config[tnode].physmax; 706 mem_node_del_slice(start, end); 707 } 708 709 plat_assign_lgrphand_to_mem_node(thand, snode); 710 plat_assign_lgrphand_to_mem_node(shand, tnode); 711 712 lmr.lmem_rename_from = shand; 713 lmr.lmem_rename_to = thand; 714 715 /* 716 * Remove source memnode of copy rename from its lgroup 717 * and add it to its new target lgroup 718 */ 719 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 720 (uintptr_t)&lmr); 721 722 break; 723 724 default: 725 break; 726 } 727 } 728 729 /* 730 * Return latency between "from" and "to" lgroups 731 * 732 * This latency number can only be used for relative comparison 733 * between lgroups on the running system, cannot be used across platforms, 734 * and may not reflect the actual latency. It is platform and implementation 735 * specific, so platform gets to decide its value. It would be nice if the 736 * number was at least proportional to make comparisons more meaningful though. 737 * NOTE: The numbers below are supposed to be load latencies for uncached 738 * memory divided by 10. 739 * 740 */ 741 int 742 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 743 { 744 /* 745 * Return min remote latency when there are more than two lgroups 746 * (root and child) and getting latency between two different lgroups 747 * or root is involved 748 */ 749 if (lgrp_optimizations() && (from != to || 750 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 751 return (42); 752 else 753 return (35); 754 } 755 756 /* 757 * Return platform handle for root lgroup 758 */ 759 lgrp_handle_t 760 plat_lgrp_root_hand(void) 761 { 762 if (mpo_disabled) 763 return (lgrp_default_handle); 764 765 return (LGRP_DEFAULT_HANDLE); 766 } 767 768 /*ARGSUSED*/ 769 void 770 plat_freelist_process(int mnode) 771 { 772 } 773 774 void 775 load_platform_drivers(void) 776 { 777 (void) i_ddi_attach_pseudo_node("dr"); 778 } 779 780 /* 781 * No platform drivers on this platform 782 */ 783 char *platform_module_list[] = { 784 (char *)0 785 }; 786 787 /*ARGSUSED*/ 788 void 789 plat_tod_fault(enum tod_fault_type tod_bad) 790 { 791 } 792 793 /*ARGSUSED*/ 794 void 795 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 796 { 797 static void (*scf_panic_callback)(int); 798 static void (*scf_shutdown_callback)(int); 799 800 /* 801 * This is for notifing system panic/shutdown to SCF. 802 * In case of shutdown and panic, SCF call back 803 * function should be called. 804 * <SCF call back functions> 805 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 806 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 807 * cpuid should be -1 and state should be SIGST_EXIT. 808 */ 809 if (state == SIGST_EXIT && cpuid == -1) { 810 811 /* 812 * find the symbol for the SCF panic callback routine in driver 813 */ 814 if (scf_panic_callback == NULL) 815 scf_panic_callback = (void (*)(int)) 816 modgetsymvalue("scf_panic_callb", 0); 817 if (scf_shutdown_callback == NULL) 818 scf_shutdown_callback = (void (*)(int)) 819 modgetsymvalue("scf_shutdown_callb", 0); 820 821 switch (sub_state) { 822 case SIGSUBST_PANIC: 823 if (scf_panic_callback == NULL) { 824 cmn_err(CE_NOTE, "!cpu_sgn_update: " 825 "scf_panic_callb not found\n"); 826 return; 827 } 828 scf_panic_callback(SIGSUBST_PANIC); 829 break; 830 831 case SIGSUBST_HALT: 832 if (scf_shutdown_callback == NULL) { 833 cmn_err(CE_NOTE, "!cpu_sgn_update: " 834 "scf_shutdown_callb not found\n"); 835 return; 836 } 837 scf_shutdown_callback(SIGSUBST_HALT); 838 break; 839 840 case SIGSUBST_ENVIRON: 841 if (scf_shutdown_callback == NULL) { 842 cmn_err(CE_NOTE, "!cpu_sgn_update: " 843 "scf_shutdown_callb not found\n"); 844 return; 845 } 846 scf_shutdown_callback(SIGSUBST_ENVIRON); 847 break; 848 849 case SIGSUBST_REBOOT: 850 if (scf_shutdown_callback == NULL) { 851 cmn_err(CE_NOTE, "!cpu_sgn_update: " 852 "scf_shutdown_callb not found\n"); 853 return; 854 } 855 scf_shutdown_callback(SIGSUBST_REBOOT); 856 break; 857 } 858 } 859 } 860 861 /*ARGSUSED*/ 862 int 863 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 864 int flt_in_memory, ushort_t flt_status, 865 char *buf, int buflen, int *lenp) 866 { 867 /* 868 * check if it's a Memory error. 869 */ 870 if (flt_in_memory) { 871 if (opl_get_mem_unum != NULL) { 872 return (opl_get_mem_unum(synd_code, flt_addr, buf, 873 buflen, lenp)); 874 } else { 875 return (ENOTSUP); 876 } 877 } else { 878 return (ENOTSUP); 879 } 880 } 881 882 /*ARGSUSED*/ 883 int 884 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 885 { 886 int ret = 0; 887 int sb; 888 int plen; 889 890 sb = opl_get_physical_board(LSB_ID(cpuid)); 891 if (sb == -1) { 892 return (ENXIO); 893 } 894 895 /* 896 * opl_cur_model is assigned here 897 */ 898 if (opl_cur_model == NULL) { 899 set_model_info(); 900 901 /* 902 * if not matched, return 903 */ 904 if (opl_cur_model == NULL) 905 return (ENODEV); 906 } 907 908 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 909 910 switch (opl_cur_model->model_type) { 911 case FF1: 912 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 913 CHIP_ID(cpuid) / 2); 914 break; 915 916 case FF2: 917 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 918 (CHIP_ID(cpuid) / 2) + (sb * 2)); 919 break; 920 921 case DC1: 922 case DC2: 923 case DC3: 924 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 925 CHIP_ID(cpuid)); 926 break; 927 928 case IKKAKU: 929 plen = snprintf(buf, buflen, "/%s", "MBU_A"); 930 break; 931 932 default: 933 /* This should never happen */ 934 return (ENODEV); 935 } 936 937 if (plen >= buflen) { 938 ret = ENOSPC; 939 } else { 940 if (lenp) 941 *lenp = strlen(buf); 942 } 943 return (ret); 944 } 945 946 void 947 plat_nodename_set(void) 948 { 949 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 950 } 951 952 caddr_t efcode_vaddr = NULL; 953 954 /* 955 * Preallocate enough memory for fcode claims. 956 */ 957 958 caddr_t 959 efcode_alloc(caddr_t alloc_base) 960 { 961 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 962 MMU_PAGESIZE); 963 caddr_t vaddr; 964 965 /* 966 * allocate the physical memory for the Oberon fcode. 967 */ 968 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 969 efcode_size, MMU_PAGESIZE)) == NULL) 970 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 971 972 efcode_vaddr = vaddr; 973 974 return (efcode_alloc_base + efcode_size); 975 } 976 977 caddr_t 978 plat_startup_memlist(caddr_t alloc_base) 979 { 980 caddr_t tmp_alloc_base; 981 982 tmp_alloc_base = efcode_alloc(alloc_base); 983 tmp_alloc_base = 984 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 985 return (tmp_alloc_base); 986 } 987 988 /* need to forward declare these */ 989 static void plat_lock_delay(uint_t); 990 991 void 992 startup_platform(void) 993 { 994 if (clock_tick_threshold == 0) 995 clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD; 996 if (clock_tick_ncpus == 0) 997 clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS; 998 mutex_lock_delay = plat_lock_delay; 999 mutex_cap_factor = OPL_BOFF_MAX_SCALE; 1000 } 1001 1002 static uint_t 1003 get_mmu_id(processorid_t cpuid) 1004 { 1005 int pb = opl_get_physical_board(LSB_ID(cpuid)); 1006 1007 if (pb == -1) { 1008 cmn_err(CE_PANIC, 1009 "opl_get_physical_board failed (cpu %d LSB %u)", 1010 cpuid, LSB_ID(cpuid)); 1011 } 1012 return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) * 1013 OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid); 1014 } 1015 1016 void 1017 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 1018 { 1019 int impl; 1020 1021 impl = cpunodes[cpuid].implementation; 1022 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 1023 info->mmu_idx = get_mmu_id(cpuid); 1024 info->mmu_nctxs = 8192; 1025 } else { 1026 cmn_err(CE_PANIC, "Unknown processor %d", impl); 1027 } 1028 } 1029 1030 int 1031 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1032 { 1033 if (opl_get_mem_sid == NULL) { 1034 return (ENOTSUP); 1035 } 1036 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 1037 } 1038 1039 int 1040 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 1041 { 1042 if (opl_get_mem_offset == NULL) { 1043 return (ENOTSUP); 1044 } 1045 return (opl_get_mem_offset(paddr, offp)); 1046 } 1047 1048 int 1049 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1050 { 1051 if (opl_get_mem_addr == NULL) { 1052 return (ENOTSUP); 1053 } 1054 return (opl_get_mem_addr(unum, sid, offset, addrp)); 1055 } 1056 1057 void 1058 plat_lock_delay(uint_t backoff) 1059 { 1060 int i; 1061 uint_t cnt, remcnt; 1062 int ctr; 1063 hrtime_t delay_start, rem_delay; 1064 /* 1065 * Platform specific lock delay code for OPL 1066 * 1067 * Using staged linear increases in the delay. 1068 * The sleep instruction is the preferred method of delay, 1069 * but is too large of granularity for the initial backoff. 1070 */ 1071 1072 if (backoff < 100) { 1073 /* 1074 * If desired backoff is long enough, 1075 * use sleep for most of it 1076 */ 1077 for (cnt = backoff; 1078 cnt >= OPL_BOFF_SLEEP; 1079 cnt -= OPL_BOFF_SLEEP) { 1080 cpu_smt_pause(); 1081 } 1082 /* 1083 * spin for small remainder of backoff 1084 */ 1085 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1086 mutex_delay_default(); 1087 } 1088 } else { 1089 /* backoff is large. Fill it by sleeping */ 1090 delay_start = gethrtime_waitfree(); 1091 cnt = backoff / OPL_BOFF_SLEEP; 1092 /* 1093 * use sleep instructions for delay 1094 */ 1095 for (i = 0; i < cnt; i++) { 1096 cpu_smt_pause(); 1097 } 1098 1099 /* 1100 * Note: if the other strand executes a sleep instruction, 1101 * then the sleep ends immediately with a minimum time of 1102 * 42 clocks. We check gethrtime to insure we have 1103 * waited long enough. And we include both a short 1104 * spin loop and a sleep for repeated delay times. 1105 */ 1106 1107 rem_delay = gethrtime_waitfree() - delay_start; 1108 while (rem_delay < cnt * OPL_BOFF_TM) { 1109 remcnt = cnt - (rem_delay / OPL_BOFF_TM); 1110 for (i = 0; i < remcnt; i++) { 1111 cpu_smt_pause(); 1112 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1113 mutex_delay_default(); 1114 } 1115 } 1116 rem_delay = gethrtime_waitfree() - delay_start; 1117 } 1118 } 1119 } 1120 1121 /* 1122 * The following code implements asynchronous call to XSCF to setup the 1123 * domain node name. 1124 */ 1125 1126 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1127 1128 /* 1129 * The following three macros define the all operations on the request 1130 * list we are using here, and hide the details of the list 1131 * implementation from the code. 1132 */ 1133 #define PUSH(m) \ 1134 { \ 1135 (m)->next = ctl_msg.head; \ 1136 (m)->prev = NULL; \ 1137 if ((m)->next != NULL) \ 1138 (m)->next->prev = (m); \ 1139 ctl_msg.head = (m); \ 1140 } 1141 1142 #define REMOVE(m) \ 1143 { \ 1144 if ((m)->prev != NULL) \ 1145 (m)->prev->next = (m)->next; \ 1146 else \ 1147 ctl_msg.head = (m)->next; \ 1148 if ((m)->next != NULL) \ 1149 (m)->next->prev = (m)->prev; \ 1150 } 1151 1152 #define FREE_THE_TAIL(head) \ 1153 { \ 1154 nm_msg_t *n_msg, *m; \ 1155 m = (head)->next; \ 1156 (head)->next = NULL; \ 1157 while (m != NULL) { \ 1158 n_msg = m->next; \ 1159 FREE_MSG(m); \ 1160 m = n_msg; \ 1161 } \ 1162 } 1163 1164 #define SCF_PUTINFO(f, s, p) \ 1165 f(KEY_ESCF, 0x01, 0, s, p) 1166 1167 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1168 (m)->len, (m)->data)) == 0) 1169 1170 /* 1171 * The value of the following macro loosely depends on the 1172 * value of the "device busy" timeout used in the SCF driver. 1173 * (See pass2xscf_thread()). 1174 */ 1175 #define SCF_DEVBUSY_DELAY 10 1176 1177 /* 1178 * The default number of attempts to contact the scf driver 1179 * if we cannot fetch any information about the timeout value 1180 * it uses. 1181 */ 1182 1183 #define REPEATS 4 1184 1185 typedef struct nm_msg { 1186 struct nm_msg *next; 1187 struct nm_msg *prev; 1188 int len; 1189 char data[1]; 1190 } nm_msg_t; 1191 1192 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1193 1194 static struct ctlmsg { 1195 nm_msg_t *head; 1196 nm_msg_t *now_serving; 1197 kmutex_t nm_lock; 1198 kthread_t *nmt; 1199 int cnt; 1200 int (*scf_service_function)(uint32_t, uint8_t, 1201 uint32_t, uint32_t, void *); 1202 } ctl_msg; 1203 1204 static void 1205 post_xscf_msg(char *dp, int len) 1206 { 1207 nm_msg_t *msg; 1208 1209 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1210 1211 bcopy(dp, msg->data, len); 1212 msg->len = len; 1213 1214 mutex_enter(&ctl_msg.nm_lock); 1215 if (ctl_msg.nmt == NULL) { 1216 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1217 NULL, 0, &p0, TS_RUN, minclsyspri); 1218 } 1219 1220 PUSH(msg); 1221 ctl_msg.cnt++; 1222 mutex_exit(&ctl_msg.nm_lock); 1223 } 1224 1225 static void 1226 pass2xscf_thread() 1227 { 1228 nm_msg_t *msg; 1229 int ret; 1230 uint_t i, msg_sent, xscf_driver_delay; 1231 static uint_t repeat_cnt; 1232 uint_t *scf_wait_cnt; 1233 1234 mutex_enter(&ctl_msg.nm_lock); 1235 1236 /* 1237 * Find the address of the SCF put routine if it's not done yet. 1238 */ 1239 if (ctl_msg.scf_service_function == NULL) { 1240 if ((ctl_msg.scf_service_function = 1241 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1242 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1243 cmn_err(CE_NOTE, "pass2xscf_thread: " 1244 "scf_service_putinfo not found\n"); 1245 ctl_msg.nmt = NULL; 1246 mutex_exit(&ctl_msg.nm_lock); 1247 return; 1248 } 1249 } 1250 1251 /* 1252 * Calculate the number of attempts to connect XSCF based on the 1253 * scf driver delay (which is 1254 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1255 * of xscf_connect_delay (the total number of seconds to wait 1256 * till xscf get ready.) 1257 */ 1258 if (repeat_cnt == 0) { 1259 if ((scf_wait_cnt = 1260 (uint_t *) 1261 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1262 repeat_cnt = REPEATS; 1263 } else { 1264 1265 xscf_driver_delay = *scf_wait_cnt * 1266 SCF_DEVBUSY_DELAY; 1267 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1268 } 1269 } 1270 1271 while (ctl_msg.cnt != 0) { 1272 1273 /* 1274 * Take the very last request from the queue, 1275 */ 1276 ctl_msg.now_serving = ctl_msg.head; 1277 ASSERT(ctl_msg.now_serving != NULL); 1278 1279 /* 1280 * and discard all the others if any. 1281 */ 1282 FREE_THE_TAIL(ctl_msg.now_serving); 1283 ctl_msg.cnt = 1; 1284 mutex_exit(&ctl_msg.nm_lock); 1285 1286 /* 1287 * Pass the name to XSCF. Note please, we do not hold the 1288 * mutex while we are doing this. 1289 */ 1290 msg_sent = 0; 1291 for (i = 0; i < repeat_cnt; i++) { 1292 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1293 msg_sent = 1; 1294 break; 1295 } else { 1296 if (ret != EBUSY) { 1297 cmn_err(CE_NOTE, "pass2xscf_thread:" 1298 " unexpected return code" 1299 " from scf_service_putinfo():" 1300 " %d\n", ret); 1301 } 1302 } 1303 } 1304 1305 if (msg_sent) { 1306 1307 /* 1308 * Remove the request from the list 1309 */ 1310 mutex_enter(&ctl_msg.nm_lock); 1311 msg = ctl_msg.now_serving; 1312 ctl_msg.now_serving = NULL; 1313 REMOVE(msg); 1314 ctl_msg.cnt--; 1315 mutex_exit(&ctl_msg.nm_lock); 1316 FREE_MSG(msg); 1317 } else { 1318 1319 /* 1320 * If while we have tried to communicate with 1321 * XSCF there were any other requests we are 1322 * going to drop this one and take the latest 1323 * one. Otherwise we will try to pass this one 1324 * again. 1325 */ 1326 cmn_err(CE_NOTE, 1327 "pass2xscf_thread: " 1328 "scf_service_putinfo " 1329 "not responding\n"); 1330 } 1331 mutex_enter(&ctl_msg.nm_lock); 1332 } 1333 1334 /* 1335 * The request queue is empty, exit. 1336 */ 1337 ctl_msg.nmt = NULL; 1338 mutex_exit(&ctl_msg.nm_lock); 1339 } 1340