1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/cpuvar.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/promif.h> 32 #include <sys/platform_module.h> 33 #include <sys/cmn_err.h> 34 #include <sys/errno.h> 35 #include <sys/machsystm.h> 36 #include <sys/bootconf.h> 37 #include <sys/nvpair.h> 38 #include <sys/kobj.h> 39 #include <sys/mem_cage.h> 40 #include <sys/opl.h> 41 #include <sys/scfd/scfostoescf.h> 42 #include <sys/cpu_sgnblk_defs.h> 43 #include <sys/utsname.h> 44 #include <sys/ddi.h> 45 #include <sys/sunndi.h> 46 #include <sys/lgrp.h> 47 #include <sys/memnode.h> 48 #include <sys/sysmacros.h> 49 #include <sys/time.h> 50 #include <sys/cpu.h> 51 #include <vm/vm_dep.h> 52 53 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 54 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 55 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 56 int (*opl_get_mem_addr)(char *unum, char *sid, 57 uint64_t offset, uint64_t *paddr); 58 59 /* Memory for fcode claims. 16k times # maximum possible IO units */ 60 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 61 int efcode_size = EFCODE_SIZE; 62 63 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 64 65 /* Set the maximum number of boards for DR */ 66 int opl_boards = OPL_MAX_BOARDS; 67 68 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 69 70 extern int tsb_lgrp_affinity; 71 72 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 73 (OPL_MAX_TSBS_PER_PCICH); 74 75 pgcnt_t opl_startup_cage_size = 0; 76 77 /* 78 * The length of the delay in seconds in communication with XSCF after 79 * which the warning message will be logged. 80 */ 81 uint_t xscf_connect_delay = 60 * 15; 82 83 static opl_model_info_t opl_models[] = { 84 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 85 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 86 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 87 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 88 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 89 { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE }, 90 }; 91 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 92 93 /* 94 * opl_cur_model 95 */ 96 static opl_model_info_t *opl_cur_model = NULL; 97 98 static struct memlist *opl_memlist_per_board(struct memlist *ml); 99 static void post_xscf_msg(char *, int); 100 static void pass2xscf_thread(); 101 102 /* 103 * Note FF/DC out-of-order instruction engine takes only a 104 * single cycle to execute each spin loop 105 * for comparison, Panther takes 6 cycles for same loop 106 * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time 107 * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C) 108 * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep 109 * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus 110 * Listed values tuned for 2.15GHz to 2.64GHz systems 111 * Value may change for future systems 112 */ 113 #define OPL_BOFF_SPIN 7 114 #define OPL_BOFF_SLEEP 4 115 #define OPL_BOFF_TM 1600 116 #define OPL_BOFF_MAX_SCALE 8 117 118 #define OPL_CLOCK_TICK_THRESHOLD 128 119 #define OPL_CLOCK_TICK_NCPUS 64 120 121 extern int clock_tick_threshold; 122 extern int clock_tick_ncpus; 123 124 int 125 set_platform_max_ncpus(void) 126 { 127 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 128 } 129 130 int 131 set_platform_tsb_spares(void) 132 { 133 return (MIN(opl_tsb_spares, MAX_UPA)); 134 } 135 136 static void 137 set_model_info() 138 { 139 extern int ts_dispatch_extended; 140 char name[MAXSYSNAME]; 141 int i; 142 143 /* 144 * Get model name from the root node. 145 * 146 * We are using the prom device tree since, at this point, 147 * the Solaris device tree is not yet setup. 148 */ 149 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 150 151 for (i = 0; i < opl_num_models; i++) { 152 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 153 opl_cur_model = &opl_models[i]; 154 break; 155 } 156 } 157 158 /* 159 * If model not matched, it's an unknown model. 160 * Just return. It will default to standard dispatch tables. 161 */ 162 if (i == opl_num_models) 163 return; 164 165 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 166 (ts_dispatch_extended == -1)) { 167 /* 168 * Based on a platform model, select a dispatch table. 169 * Only DC2 and DC3 systems uses the alternate/extended 170 * TS dispatch table. 171 * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch 172 * tables. 173 */ 174 ts_dispatch_extended = 1; 175 } 176 177 } 178 179 static void 180 set_max_mmu_ctxdoms() 181 { 182 extern uint_t max_mmu_ctxdoms; 183 int max_boards; 184 185 /* 186 * From the model, get the maximum number of boards 187 * supported and set the value accordingly. If the model 188 * could not be determined or recognized, we assume the max value. 189 */ 190 if (opl_cur_model == NULL) 191 max_boards = OPL_MAX_BOARDS; 192 else 193 max_boards = opl_cur_model->model_max_boards; 194 195 /* 196 * On OPL, cores and MMUs are one-to-one. 197 */ 198 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 199 } 200 201 #pragma weak mmu_init_large_pages 202 203 void 204 set_platform_defaults(void) 205 { 206 extern char *tod_module_name; 207 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 208 extern void mmu_init_large_pages(size_t); 209 210 /* Set the CPU signature function pointer */ 211 cpu_sgn_func = cpu_sgn_update; 212 213 /* Set appropriate tod module for OPL platform */ 214 ASSERT(tod_module_name == NULL); 215 tod_module_name = "todopl"; 216 217 if ((mmu_page_sizes == max_mmu_page_sizes) && 218 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 219 if (&mmu_init_large_pages) 220 mmu_init_large_pages(mmu_ism_pagesize); 221 } 222 223 tsb_lgrp_affinity = 1; 224 225 set_max_mmu_ctxdoms(); 226 } 227 228 /* 229 * Convert logical a board number to a physical one. 230 */ 231 232 #define LSBPROP "board#" 233 #define PSBPROP "physical-board#" 234 235 int 236 opl_get_physical_board(int id) 237 { 238 dev_info_t *root_dip, *dip = NULL; 239 char *dname = NULL; 240 int circ; 241 242 pnode_t pnode; 243 char pname[MAXSYSNAME] = {0}; 244 245 int lsb_id; /* Logical System Board ID */ 246 int psb_id; /* Physical System Board ID */ 247 248 249 /* 250 * This function is called on early stage of bootup when the 251 * kernel device tree is not initialized yet, and also 252 * later on when the device tree is up. We want to try 253 * the fast track first. 254 */ 255 root_dip = ddi_root_node(); 256 if (root_dip) { 257 /* Get from devinfo node */ 258 ndi_devi_enter(root_dip, &circ); 259 for (dip = ddi_get_child(root_dip); dip; 260 dip = ddi_get_next_sibling(dip)) { 261 262 dname = ddi_node_name(dip); 263 if (strncmp(dname, "pseudo-mc", 9) != 0) 264 continue; 265 266 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 267 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 268 continue; 269 270 if (id == lsb_id) { 271 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 272 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 273 == -1) { 274 ndi_devi_exit(root_dip, circ); 275 return (-1); 276 } else { 277 ndi_devi_exit(root_dip, circ); 278 return (psb_id); 279 } 280 } 281 } 282 ndi_devi_exit(root_dip, circ); 283 } 284 285 /* 286 * We do not have the kernel device tree, or we did not 287 * find the node for some reason (let's say the kernel 288 * device tree was modified), let's try the OBP tree. 289 */ 290 pnode = prom_rootnode(); 291 for (pnode = prom_childnode(pnode); pnode; 292 pnode = prom_nextnode(pnode)) { 293 294 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 295 (strncmp(pname, "pseudo-mc", 9) != 0)) 296 continue; 297 298 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 299 continue; 300 301 if (id == lsb_id) { 302 if (prom_getprop(pnode, PSBPROP, 303 (caddr_t)&psb_id) == -1) { 304 return (-1); 305 } else { 306 return (psb_id); 307 } 308 } 309 } 310 311 return (-1); 312 } 313 314 /* 315 * For OPL it's possible that memory from two or more successive boards 316 * will be contiguous across the boards, and therefore represented as a 317 * single chunk. 318 * This function splits such chunks down the board boundaries. 319 */ 320 static struct memlist * 321 opl_memlist_per_board(struct memlist *ml) 322 { 323 uint64_t ssize, low, high, boundary; 324 struct memlist *head, *tail, *new; 325 326 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 327 328 head = tail = NULL; 329 330 for (; ml; ml = ml->next) { 331 low = (uint64_t)ml->address; 332 high = low+(uint64_t)(ml->size); 333 while (low < high) { 334 boundary = roundup(low+1, ssize); 335 boundary = MIN(high, boundary); 336 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 337 new->address = low; 338 new->size = boundary - low; 339 if (head == NULL) 340 head = new; 341 if (tail) { 342 tail->next = new; 343 new->prev = tail; 344 } 345 tail = new; 346 low = boundary; 347 } 348 } 349 return (head); 350 } 351 352 void 353 set_platform_cage_params(void) 354 { 355 extern pgcnt_t total_pages; 356 extern struct memlist *phys_avail; 357 struct memlist *ml, *tml; 358 359 if (kernel_cage_enable) { 360 pgcnt_t preferred_cage_size; 361 362 preferred_cage_size = MAX(opl_startup_cage_size, 363 total_pages / 256); 364 365 ml = opl_memlist_per_board(phys_avail); 366 367 /* 368 * Note: we are assuming that post has load the 369 * whole show in to the high end of memory. Having 370 * taken this leap, we copy the whole of phys_avail 371 * the glist and arrange for the cage to grow 372 * downward (descending pfns). 373 */ 374 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 375 376 /* free the memlist */ 377 do { 378 tml = ml->next; 379 kmem_free(ml, sizeof (struct memlist)); 380 ml = tml; 381 } while (ml != NULL); 382 } 383 384 if (kcage_on) 385 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 386 else 387 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 388 } 389 390 /*ARGSUSED*/ 391 int 392 plat_cpu_poweron(struct cpu *cp) 393 { 394 int (*opl_cpu_poweron)(struct cpu *) = NULL; 395 396 opl_cpu_poweron = 397 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 398 399 if (opl_cpu_poweron == NULL) 400 return (ENOTSUP); 401 else 402 return ((opl_cpu_poweron)(cp)); 403 404 } 405 406 /*ARGSUSED*/ 407 int 408 plat_cpu_poweroff(struct cpu *cp) 409 { 410 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 411 412 opl_cpu_poweroff = 413 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 414 415 if (opl_cpu_poweroff == NULL) 416 return (ENOTSUP); 417 else 418 return ((opl_cpu_poweroff)(cp)); 419 420 } 421 422 int 423 plat_max_boards(void) 424 { 425 return (OPL_MAX_BOARDS); 426 } 427 428 int 429 plat_max_cpu_units_per_board(void) 430 { 431 return (OPL_MAX_CPU_PER_BOARD); 432 } 433 434 int 435 plat_max_mem_units_per_board(void) 436 { 437 return (OPL_MAX_MEM_UNITS_PER_BOARD); 438 } 439 440 int 441 plat_max_io_units_per_board(void) 442 { 443 return (OPL_MAX_IO_UNITS_PER_BOARD); 444 } 445 446 int 447 plat_max_cmp_units_per_board(void) 448 { 449 return (OPL_MAX_CMP_UNITS_PER_BOARD); 450 } 451 452 int 453 plat_max_core_units_per_board(void) 454 { 455 return (OPL_MAX_CORE_UNITS_PER_BOARD); 456 } 457 458 int 459 plat_pfn_to_mem_node(pfn_t pfn) 460 { 461 return (pfn >> mem_node_pfn_shift); 462 } 463 464 /* ARGSUSED */ 465 void 466 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 467 { 468 size_t elem; 469 pfn_t basepfn; 470 pgcnt_t npgs; 471 uint64_t boundary, ssize; 472 uint64_t low, high; 473 474 /* 475 * OPL mem slices are always aligned on a 256GB boundary. 476 */ 477 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 478 mem_node_physalign = 0; 479 480 /* 481 * Boot install lists are arranged <addr, len>, <addr, len>, ... 482 */ 483 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 484 for (elem = 0; elem < nelems; list++, elem++) { 485 low = list->addr; 486 high = low + list->size; 487 while (low < high) { 488 boundary = roundup(low+1, ssize); 489 boundary = MIN(high, boundary); 490 basepfn = btop(low); 491 npgs = btop(boundary - low); 492 mem_node_add_slice(basepfn, basepfn + npgs - 1); 493 low = boundary; 494 } 495 } 496 } 497 498 /* 499 * Find the CPU associated with a slice at boot-time. 500 */ 501 void 502 plat_fill_mc(pnode_t nodeid) 503 { 504 int board; 505 int memnode; 506 struct { 507 uint64_t addr; 508 uint64_t size; 509 } mem_range; 510 511 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 512 panic("Can not find board# property in mc node %x", nodeid); 513 } 514 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 515 panic("Can not find sb-mem-ranges property in mc node %x", 516 nodeid); 517 } 518 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 519 plat_assign_lgrphand_to_mem_node(board, memnode); 520 } 521 522 /* 523 * Return the platform handle for the lgroup containing the given CPU 524 * 525 * For OPL, lgroup platform handle == board #. 526 */ 527 528 extern int mpo_disabled; 529 extern lgrp_handle_t lgrp_default_handle; 530 531 lgrp_handle_t 532 plat_lgrp_cpu_to_hand(processorid_t id) 533 { 534 lgrp_handle_t plathand; 535 536 /* 537 * Return the real platform handle for the CPU until 538 * such time as we know that MPO should be disabled. 539 * At that point, we set the "mpo_disabled" flag to true, 540 * and from that point on, return the default handle. 541 * 542 * By the time we know that MPO should be disabled, the 543 * first CPU will have already been added to a leaf 544 * lgroup, but that's ok. The common lgroup code will 545 * double check that the boot CPU is in the correct place, 546 * and in the case where mpo should be disabled, will move 547 * it to the root if necessary. 548 */ 549 if (mpo_disabled) { 550 /* If MPO is disabled, return the default (UMA) handle */ 551 plathand = lgrp_default_handle; 552 } else 553 plathand = (lgrp_handle_t)LSB_ID(id); 554 return (plathand); 555 } 556 557 /* 558 * Platform specific lgroup initialization 559 */ 560 void 561 plat_lgrp_init(void) 562 { 563 extern uint32_t lgrp_expand_proc_thresh; 564 extern uint32_t lgrp_expand_proc_diff; 565 const uint_t m = LGRP_LOADAVG_THREAD_MAX; 566 567 /* 568 * Set tuneables for the OPL architecture 569 * 570 * lgrp_expand_proc_thresh is the threshold load on the set of 571 * lgroups a process is currently using on before considering 572 * adding another lgroup to the set. For Oly-C and Jupiter 573 * systems, there are four sockets per lgroup. Setting 574 * lgrp_expand_proc_thresh to add lgroups when the load reaches 575 * four threads will spread the load when it exceeds one thread 576 * per socket, optimizing memory bandwidth and L2 cache space. 577 * 578 * lgrp_expand_proc_diff determines how much less another lgroup 579 * must be loaded before shifting the start location of a thread 580 * to it. 581 * 582 * lgrp_loadavg_tolerance is the threshold where two lgroups are 583 * considered to have different loads. It is set to be less than 584 * 1% so that even a small residual load will be considered different 585 * from no residual load. 586 * 587 * We note loadavg values are not precise. 588 * Every 1/10 of a second loadavg values are reduced by 5%. 589 * This adjustment can come in the middle of the lgroup selection 590 * process, and for larger parallel apps with many threads can 591 * frequently occur between the start of the second thread 592 * placement and the finish of the last thread placement. 593 * We also must be careful to not use too small of a threshold 594 * since the cumulative decay for 1 second idle time is 40%. 595 * That is, the residual load from completed threads will still 596 * be 60% one second after the proc goes idle or 8% after 5 seconds. 597 * 598 * To allow for lag time in loadavg calculations 599 * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX 600 * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX 601 * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX 602 * 603 * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX 604 * as the equivalent of a load of 1. To make the code more compact, 605 * we set m = LGRP_LOADAVG_THREAD_MAX. 606 */ 607 lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2); 608 lgrp_expand_proc_diff = (m >> 1) + (m >> 2); 609 lgrp_loadavg_tolerance = (m >> 7); 610 } 611 612 /* 613 * Platform notification of lgroup (re)configuration changes 614 */ 615 /*ARGSUSED*/ 616 void 617 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 618 { 619 update_membounds_t *umb; 620 lgrp_config_mem_rename_t lmr; 621 int sbd, tbd; 622 lgrp_handle_t hand, shand, thand; 623 int mnode, snode, tnode; 624 pfn_t start, end; 625 626 if (mpo_disabled) 627 return; 628 629 switch (evt) { 630 631 case LGRP_CONFIG_MEM_ADD: 632 /* 633 * Establish the lgroup handle to memnode translation. 634 */ 635 umb = (update_membounds_t *)arg; 636 637 hand = umb->u_board; 638 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 639 plat_assign_lgrphand_to_mem_node(hand, mnode); 640 641 break; 642 643 case LGRP_CONFIG_MEM_DEL: 644 /* 645 * Special handling for possible memory holes. 646 */ 647 umb = (update_membounds_t *)arg; 648 hand = umb->u_board; 649 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 650 if (mem_node_config[mnode].exists) { 651 start = mem_node_config[mnode].physbase; 652 end = mem_node_config[mnode].physmax; 653 mem_node_pre_del_slice(start, end); 654 mem_node_post_del_slice(start, end, 0); 655 } 656 } 657 658 break; 659 660 case LGRP_CONFIG_MEM_RENAME: 661 /* 662 * During a DR copy-rename operation, all of the memory 663 * on one board is moved to another board -- but the 664 * addresses/pfns and memnodes don't change. This means 665 * the memory has changed locations without changing identity. 666 * 667 * Source is where we are copying from and target is where we 668 * are copying to. After source memnode is copied to target 669 * memnode, the physical addresses of the target memnode are 670 * renamed to match what the source memnode had. Then target 671 * memnode can be removed and source memnode can take its 672 * place. 673 * 674 * To do this, swap the lgroup handle to memnode mappings for 675 * the boards, so target lgroup will have source memnode and 676 * source lgroup will have empty target memnode which is where 677 * its memory will go (if any is added to it later). 678 * 679 * Then source memnode needs to be removed from its lgroup 680 * and added to the target lgroup where the memory was living 681 * but under a different name/memnode. The memory was in the 682 * target memnode and now lives in the source memnode with 683 * different physical addresses even though it is the same 684 * memory. 685 */ 686 sbd = arg & 0xffff; 687 tbd = (arg & 0xffff0000) >> 16; 688 shand = sbd; 689 thand = tbd; 690 snode = plat_lgrphand_to_mem_node(shand); 691 tnode = plat_lgrphand_to_mem_node(thand); 692 693 /* 694 * Special handling for possible memory holes. 695 */ 696 if (tnode != -1 && mem_node_config[tnode].exists) { 697 start = mem_node_config[tnode].physbase; 698 end = mem_node_config[tnode].physmax; 699 mem_node_pre_del_slice(start, end); 700 mem_node_post_del_slice(start, end, 0); 701 } 702 703 plat_assign_lgrphand_to_mem_node(thand, snode); 704 plat_assign_lgrphand_to_mem_node(shand, tnode); 705 706 lmr.lmem_rename_from = shand; 707 lmr.lmem_rename_to = thand; 708 709 /* 710 * Remove source memnode of copy rename from its lgroup 711 * and add it to its new target lgroup 712 */ 713 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 714 (uintptr_t)&lmr); 715 716 break; 717 718 default: 719 break; 720 } 721 } 722 723 /* 724 * Return latency between "from" and "to" lgroups 725 * 726 * This latency number can only be used for relative comparison 727 * between lgroups on the running system, cannot be used across platforms, 728 * and may not reflect the actual latency. It is platform and implementation 729 * specific, so platform gets to decide its value. It would be nice if the 730 * number was at least proportional to make comparisons more meaningful though. 731 * NOTE: The numbers below are supposed to be load latencies for uncached 732 * memory divided by 10. 733 * 734 */ 735 int 736 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 737 { 738 /* 739 * Return min remote latency when there are more than two lgroups 740 * (root and child) and getting latency between two different lgroups 741 * or root is involved 742 */ 743 if (lgrp_optimizations() && (from != to || 744 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 745 return (42); 746 else 747 return (35); 748 } 749 750 /* 751 * Return platform handle for root lgroup 752 */ 753 lgrp_handle_t 754 plat_lgrp_root_hand(void) 755 { 756 if (mpo_disabled) 757 return (lgrp_default_handle); 758 759 return (LGRP_DEFAULT_HANDLE); 760 } 761 762 /*ARGSUSED*/ 763 void 764 plat_freelist_process(int mnode) 765 { 766 } 767 768 void 769 load_platform_drivers(void) 770 { 771 (void) i_ddi_attach_pseudo_node("dr"); 772 } 773 774 /* 775 * No platform drivers on this platform 776 */ 777 char *platform_module_list[] = { 778 (char *)0 779 }; 780 781 /*ARGSUSED*/ 782 void 783 plat_tod_fault(enum tod_fault_type tod_bad) 784 { 785 } 786 787 /*ARGSUSED*/ 788 void 789 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 790 { 791 static void (*scf_panic_callback)(int); 792 static void (*scf_shutdown_callback)(int); 793 794 /* 795 * This is for notifing system panic/shutdown to SCF. 796 * In case of shutdown and panic, SCF call back 797 * function should be called. 798 * <SCF call back functions> 799 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 800 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 801 * cpuid should be -1 and state should be SIGST_EXIT. 802 */ 803 if (state == SIGST_EXIT && cpuid == -1) { 804 805 /* 806 * find the symbol for the SCF panic callback routine in driver 807 */ 808 if (scf_panic_callback == NULL) 809 scf_panic_callback = (void (*)(int)) 810 modgetsymvalue("scf_panic_callb", 0); 811 if (scf_shutdown_callback == NULL) 812 scf_shutdown_callback = (void (*)(int)) 813 modgetsymvalue("scf_shutdown_callb", 0); 814 815 switch (sub_state) { 816 case SIGSUBST_PANIC: 817 if (scf_panic_callback == NULL) { 818 cmn_err(CE_NOTE, "!cpu_sgn_update: " 819 "scf_panic_callb not found\n"); 820 return; 821 } 822 scf_panic_callback(SIGSUBST_PANIC); 823 break; 824 825 case SIGSUBST_HALT: 826 if (scf_shutdown_callback == NULL) { 827 cmn_err(CE_NOTE, "!cpu_sgn_update: " 828 "scf_shutdown_callb not found\n"); 829 return; 830 } 831 scf_shutdown_callback(SIGSUBST_HALT); 832 break; 833 834 case SIGSUBST_ENVIRON: 835 if (scf_shutdown_callback == NULL) { 836 cmn_err(CE_NOTE, "!cpu_sgn_update: " 837 "scf_shutdown_callb not found\n"); 838 return; 839 } 840 scf_shutdown_callback(SIGSUBST_ENVIRON); 841 break; 842 843 case SIGSUBST_REBOOT: 844 if (scf_shutdown_callback == NULL) { 845 cmn_err(CE_NOTE, "!cpu_sgn_update: " 846 "scf_shutdown_callb not found\n"); 847 return; 848 } 849 scf_shutdown_callback(SIGSUBST_REBOOT); 850 break; 851 } 852 } 853 } 854 855 /*ARGSUSED*/ 856 int 857 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 858 int flt_in_memory, ushort_t flt_status, 859 char *buf, int buflen, int *lenp) 860 { 861 /* 862 * check if it's a Memory error. 863 */ 864 if (flt_in_memory) { 865 if (opl_get_mem_unum != NULL) { 866 return (opl_get_mem_unum(synd_code, flt_addr, buf, 867 buflen, lenp)); 868 } else { 869 return (ENOTSUP); 870 } 871 } else { 872 return (ENOTSUP); 873 } 874 } 875 876 /*ARGSUSED*/ 877 int 878 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 879 { 880 int ret = 0; 881 int sb; 882 int plen; 883 884 sb = opl_get_physical_board(LSB_ID(cpuid)); 885 if (sb == -1) { 886 return (ENXIO); 887 } 888 889 /* 890 * opl_cur_model is assigned here 891 */ 892 if (opl_cur_model == NULL) { 893 set_model_info(); 894 895 /* 896 * if not matched, return 897 */ 898 if (opl_cur_model == NULL) 899 return (ENODEV); 900 } 901 902 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 903 904 switch (opl_cur_model->model_type) { 905 case FF1: 906 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 907 CHIP_ID(cpuid) / 2); 908 break; 909 910 case FF2: 911 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 912 (CHIP_ID(cpuid) / 2) + (sb * 2)); 913 break; 914 915 case DC1: 916 case DC2: 917 case DC3: 918 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 919 CHIP_ID(cpuid)); 920 break; 921 922 case IKKAKU: 923 plen = snprintf(buf, buflen, "/%s", "MBU_A"); 924 break; 925 926 default: 927 /* This should never happen */ 928 return (ENODEV); 929 } 930 931 if (plen >= buflen) { 932 ret = ENOSPC; 933 } else { 934 if (lenp) 935 *lenp = strlen(buf); 936 } 937 return (ret); 938 } 939 940 void 941 plat_nodename_set(void) 942 { 943 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 944 } 945 946 caddr_t efcode_vaddr = NULL; 947 948 /* 949 * Preallocate enough memory for fcode claims. 950 */ 951 952 caddr_t 953 efcode_alloc(caddr_t alloc_base) 954 { 955 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 956 MMU_PAGESIZE); 957 caddr_t vaddr; 958 959 /* 960 * allocate the physical memory for the Oberon fcode. 961 */ 962 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 963 efcode_size, MMU_PAGESIZE)) == NULL) 964 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 965 966 efcode_vaddr = vaddr; 967 968 return (efcode_alloc_base + efcode_size); 969 } 970 971 caddr_t 972 plat_startup_memlist(caddr_t alloc_base) 973 { 974 caddr_t tmp_alloc_base; 975 976 tmp_alloc_base = efcode_alloc(alloc_base); 977 tmp_alloc_base = 978 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 979 return (tmp_alloc_base); 980 } 981 982 /* need to forward declare these */ 983 static void plat_lock_delay(uint_t); 984 985 void 986 startup_platform(void) 987 { 988 if (clock_tick_threshold == 0) 989 clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD; 990 if (clock_tick_ncpus == 0) 991 clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS; 992 mutex_lock_delay = plat_lock_delay; 993 mutex_cap_factor = OPL_BOFF_MAX_SCALE; 994 } 995 996 static uint_t 997 get_mmu_id(processorid_t cpuid) 998 { 999 int pb = opl_get_physical_board(LSB_ID(cpuid)); 1000 1001 if (pb == -1) { 1002 cmn_err(CE_PANIC, 1003 "opl_get_physical_board failed (cpu %d LSB %u)", 1004 cpuid, LSB_ID(cpuid)); 1005 } 1006 return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) * 1007 OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid); 1008 } 1009 1010 void 1011 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 1012 { 1013 int impl; 1014 1015 impl = cpunodes[cpuid].implementation; 1016 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 1017 info->mmu_idx = get_mmu_id(cpuid); 1018 info->mmu_nctxs = 8192; 1019 } else { 1020 cmn_err(CE_PANIC, "Unknown processor %d", impl); 1021 } 1022 } 1023 1024 int 1025 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1026 { 1027 if (opl_get_mem_sid == NULL) { 1028 return (ENOTSUP); 1029 } 1030 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 1031 } 1032 1033 int 1034 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 1035 { 1036 if (opl_get_mem_offset == NULL) { 1037 return (ENOTSUP); 1038 } 1039 return (opl_get_mem_offset(paddr, offp)); 1040 } 1041 1042 int 1043 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1044 { 1045 if (opl_get_mem_addr == NULL) { 1046 return (ENOTSUP); 1047 } 1048 return (opl_get_mem_addr(unum, sid, offset, addrp)); 1049 } 1050 1051 void 1052 plat_lock_delay(uint_t backoff) 1053 { 1054 int i; 1055 uint_t cnt, remcnt; 1056 int ctr; 1057 hrtime_t delay_start, rem_delay; 1058 /* 1059 * Platform specific lock delay code for OPL 1060 * 1061 * Using staged linear increases in the delay. 1062 * The sleep instruction is the preferred method of delay, 1063 * but is too large of granularity for the initial backoff. 1064 */ 1065 1066 if (backoff < 100) { 1067 /* 1068 * If desired backoff is long enough, 1069 * use sleep for most of it 1070 */ 1071 for (cnt = backoff; 1072 cnt >= OPL_BOFF_SLEEP; 1073 cnt -= OPL_BOFF_SLEEP) { 1074 cpu_smt_pause(); 1075 } 1076 /* 1077 * spin for small remainder of backoff 1078 */ 1079 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1080 mutex_delay_default(); 1081 } 1082 } else { 1083 /* backoff is large. Fill it by sleeping */ 1084 delay_start = gethrtime_waitfree(); 1085 cnt = backoff / OPL_BOFF_SLEEP; 1086 /* 1087 * use sleep instructions for delay 1088 */ 1089 for (i = 0; i < cnt; i++) { 1090 cpu_smt_pause(); 1091 } 1092 1093 /* 1094 * Note: if the other strand executes a sleep instruction, 1095 * then the sleep ends immediately with a minimum time of 1096 * 42 clocks. We check gethrtime to insure we have 1097 * waited long enough. And we include both a short 1098 * spin loop and a sleep for repeated delay times. 1099 */ 1100 1101 rem_delay = gethrtime_waitfree() - delay_start; 1102 while (rem_delay < cnt * OPL_BOFF_TM) { 1103 remcnt = cnt - (rem_delay / OPL_BOFF_TM); 1104 for (i = 0; i < remcnt; i++) { 1105 cpu_smt_pause(); 1106 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1107 mutex_delay_default(); 1108 } 1109 } 1110 rem_delay = gethrtime_waitfree() - delay_start; 1111 } 1112 } 1113 } 1114 1115 /* 1116 * The following code implements asynchronous call to XSCF to setup the 1117 * domain node name. 1118 */ 1119 1120 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1121 1122 /* 1123 * The following three macros define the all operations on the request 1124 * list we are using here, and hide the details of the list 1125 * implementation from the code. 1126 */ 1127 #define PUSH(m) \ 1128 { \ 1129 (m)->next = ctl_msg.head; \ 1130 (m)->prev = NULL; \ 1131 if ((m)->next != NULL) \ 1132 (m)->next->prev = (m); \ 1133 ctl_msg.head = (m); \ 1134 } 1135 1136 #define REMOVE(m) \ 1137 { \ 1138 if ((m)->prev != NULL) \ 1139 (m)->prev->next = (m)->next; \ 1140 else \ 1141 ctl_msg.head = (m)->next; \ 1142 if ((m)->next != NULL) \ 1143 (m)->next->prev = (m)->prev; \ 1144 } 1145 1146 #define FREE_THE_TAIL(head) \ 1147 { \ 1148 nm_msg_t *n_msg, *m; \ 1149 m = (head)->next; \ 1150 (head)->next = NULL; \ 1151 while (m != NULL) { \ 1152 n_msg = m->next; \ 1153 FREE_MSG(m); \ 1154 m = n_msg; \ 1155 } \ 1156 } 1157 1158 #define SCF_PUTINFO(f, s, p) \ 1159 f(KEY_ESCF, 0x01, 0, s, p) 1160 1161 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1162 (m)->len, (m)->data)) == 0) 1163 1164 /* 1165 * The value of the following macro loosely depends on the 1166 * value of the "device busy" timeout used in the SCF driver. 1167 * (See pass2xscf_thread()). 1168 */ 1169 #define SCF_DEVBUSY_DELAY 10 1170 1171 /* 1172 * The default number of attempts to contact the scf driver 1173 * if we cannot fetch any information about the timeout value 1174 * it uses. 1175 */ 1176 1177 #define REPEATS 4 1178 1179 typedef struct nm_msg { 1180 struct nm_msg *next; 1181 struct nm_msg *prev; 1182 int len; 1183 char data[1]; 1184 } nm_msg_t; 1185 1186 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1187 1188 static struct ctlmsg { 1189 nm_msg_t *head; 1190 nm_msg_t *now_serving; 1191 kmutex_t nm_lock; 1192 kthread_t *nmt; 1193 int cnt; 1194 int (*scf_service_function)(uint32_t, uint8_t, 1195 uint32_t, uint32_t, void *); 1196 } ctl_msg; 1197 1198 static void 1199 post_xscf_msg(char *dp, int len) 1200 { 1201 nm_msg_t *msg; 1202 1203 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1204 1205 bcopy(dp, msg->data, len); 1206 msg->len = len; 1207 1208 mutex_enter(&ctl_msg.nm_lock); 1209 if (ctl_msg.nmt == NULL) { 1210 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1211 NULL, 0, &p0, TS_RUN, minclsyspri); 1212 } 1213 1214 PUSH(msg); 1215 ctl_msg.cnt++; 1216 mutex_exit(&ctl_msg.nm_lock); 1217 } 1218 1219 static void 1220 pass2xscf_thread() 1221 { 1222 nm_msg_t *msg; 1223 int ret; 1224 uint_t i, msg_sent, xscf_driver_delay; 1225 static uint_t repeat_cnt; 1226 uint_t *scf_wait_cnt; 1227 1228 mutex_enter(&ctl_msg.nm_lock); 1229 1230 /* 1231 * Find the address of the SCF put routine if it's not done yet. 1232 */ 1233 if (ctl_msg.scf_service_function == NULL) { 1234 if ((ctl_msg.scf_service_function = 1235 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1236 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1237 cmn_err(CE_NOTE, "pass2xscf_thread: " 1238 "scf_service_putinfo not found\n"); 1239 ctl_msg.nmt = NULL; 1240 mutex_exit(&ctl_msg.nm_lock); 1241 return; 1242 } 1243 } 1244 1245 /* 1246 * Calculate the number of attempts to connect XSCF based on the 1247 * scf driver delay (which is 1248 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1249 * of xscf_connect_delay (the total number of seconds to wait 1250 * till xscf get ready.) 1251 */ 1252 if (repeat_cnt == 0) { 1253 if ((scf_wait_cnt = 1254 (uint_t *) 1255 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1256 repeat_cnt = REPEATS; 1257 } else { 1258 1259 xscf_driver_delay = *scf_wait_cnt * 1260 SCF_DEVBUSY_DELAY; 1261 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1262 } 1263 } 1264 1265 while (ctl_msg.cnt != 0) { 1266 1267 /* 1268 * Take the very last request from the queue, 1269 */ 1270 ctl_msg.now_serving = ctl_msg.head; 1271 ASSERT(ctl_msg.now_serving != NULL); 1272 1273 /* 1274 * and discard all the others if any. 1275 */ 1276 FREE_THE_TAIL(ctl_msg.now_serving); 1277 ctl_msg.cnt = 1; 1278 mutex_exit(&ctl_msg.nm_lock); 1279 1280 /* 1281 * Pass the name to XSCF. Note please, we do not hold the 1282 * mutex while we are doing this. 1283 */ 1284 msg_sent = 0; 1285 for (i = 0; i < repeat_cnt; i++) { 1286 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1287 msg_sent = 1; 1288 break; 1289 } else { 1290 if (ret != EBUSY) { 1291 cmn_err(CE_NOTE, "pass2xscf_thread:" 1292 " unexpected return code" 1293 " from scf_service_putinfo():" 1294 " %d\n", ret); 1295 } 1296 } 1297 } 1298 1299 if (msg_sent) { 1300 1301 /* 1302 * Remove the request from the list 1303 */ 1304 mutex_enter(&ctl_msg.nm_lock); 1305 msg = ctl_msg.now_serving; 1306 ctl_msg.now_serving = NULL; 1307 REMOVE(msg); 1308 ctl_msg.cnt--; 1309 mutex_exit(&ctl_msg.nm_lock); 1310 FREE_MSG(msg); 1311 } else { 1312 1313 /* 1314 * If while we have tried to communicate with 1315 * XSCF there were any other requests we are 1316 * going to drop this one and take the latest 1317 * one. Otherwise we will try to pass this one 1318 * again. 1319 */ 1320 cmn_err(CE_NOTE, 1321 "pass2xscf_thread: " 1322 "scf_service_putinfo " 1323 "not responding\n"); 1324 } 1325 mutex_enter(&ctl_msg.nm_lock); 1326 } 1327 1328 /* 1329 * The request queue is empty, exit. 1330 */ 1331 ctl_msg.nmt = NULL; 1332 mutex_exit(&ctl_msg.nm_lock); 1333 } 1334