1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/cpuvar.h> 27 #include <sys/systm.h> 28 #include <sys/sysmacros.h> 29 #include <sys/promif.h> 30 #include <sys/platform_module.h> 31 #include <sys/cmn_err.h> 32 #include <sys/errno.h> 33 #include <sys/machsystm.h> 34 #include <sys/bootconf.h> 35 #include <sys/nvpair.h> 36 #include <sys/kobj.h> 37 #include <sys/mem_cage.h> 38 #include <sys/opl.h> 39 #include <sys/scfd/scfostoescf.h> 40 #include <sys/cpu_sgnblk_defs.h> 41 #include <sys/utsname.h> 42 #include <sys/ddi.h> 43 #include <sys/sunndi.h> 44 #include <sys/lgrp.h> 45 #include <sys/memnode.h> 46 #include <sys/sysmacros.h> 47 #include <sys/time.h> 48 #include <sys/cpu.h> 49 #include <sys/dumphdr.h> 50 #include <vm/vm_dep.h> 51 52 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 53 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 54 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 55 int (*opl_get_mem_addr)(char *unum, char *sid, 56 uint64_t offset, uint64_t *paddr); 57 58 /* Memory for fcode claims. 16k times # maximum possible IO units */ 59 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 60 int efcode_size = EFCODE_SIZE; 61 62 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 63 64 /* Set the maximum number of boards for DR */ 65 int opl_boards = OPL_MAX_BOARDS; 66 67 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 68 69 extern int tsb_lgrp_affinity; 70 71 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 72 (OPL_MAX_TSBS_PER_PCICH); 73 74 pgcnt_t opl_startup_cage_size = 0; 75 76 /* 77 * The length of the delay in seconds in communication with XSCF after 78 * which the warning message will be logged. 79 */ 80 uint_t xscf_connect_delay = 60 * 15; 81 82 static opl_model_info_t opl_models[] = { 83 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 84 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 85 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 86 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 87 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 88 { "IKKAKU", OPL_MAX_BOARDS_IKKAKU, IKKAKU, STD_DISPATCH_TABLE }, 89 }; 90 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 91 92 /* 93 * opl_cur_model 94 */ 95 static opl_model_info_t *opl_cur_model = NULL; 96 97 static struct memlist *opl_memlist_per_board(struct memlist *ml); 98 static void post_xscf_msg(char *, int); 99 static void pass2xscf_thread(); 100 101 /* 102 * Note FF/DC out-of-order instruction engine takes only a 103 * single cycle to execute each spin loop 104 * for comparison, Panther takes 6 cycles for same loop 105 * OPL_BOFF_SPIN = base spin loop, roughly one memory reference time 106 * OPL_BOFF_TM = approx nsec for OPL sleep instruction (1600 for OPL-C) 107 * OPL_BOFF_SLEEP = approx number of SPIN iterations to equal one sleep 108 * OPL_BOFF_MAX_SCALE - scaling factor for max backoff based on active cpus 109 * Listed values tuned for 2.15GHz to 2.64GHz systems 110 * Value may change for future systems 111 */ 112 #define OPL_BOFF_SPIN 7 113 #define OPL_BOFF_SLEEP 4 114 #define OPL_BOFF_TM 1600 115 #define OPL_BOFF_MAX_SCALE 8 116 117 #define OPL_CLOCK_TICK_THRESHOLD 128 118 #define OPL_CLOCK_TICK_NCPUS 64 119 120 extern int clock_tick_threshold; 121 extern int clock_tick_ncpus; 122 123 int 124 set_platform_max_ncpus(void) 125 { 126 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 127 } 128 129 int 130 set_platform_tsb_spares(void) 131 { 132 return (MIN(opl_tsb_spares, MAX_UPA)); 133 } 134 135 static void 136 set_model_info() 137 { 138 extern int ts_dispatch_extended; 139 char name[MAXSYSNAME]; 140 int i; 141 142 /* 143 * Get model name from the root node. 144 * 145 * We are using the prom device tree since, at this point, 146 * the Solaris device tree is not yet setup. 147 */ 148 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 149 150 for (i = 0; i < opl_num_models; i++) { 151 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 152 opl_cur_model = &opl_models[i]; 153 break; 154 } 155 } 156 157 /* 158 * If model not matched, it's an unknown model. 159 * Just return. It will default to standard dispatch tables. 160 */ 161 if (i == opl_num_models) 162 return; 163 164 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 165 (ts_dispatch_extended == -1)) { 166 /* 167 * Based on a platform model, select a dispatch table. 168 * Only DC2 and DC3 systems uses the alternate/extended 169 * TS dispatch table. 170 * IKKAKU, FF1, FF2 and DC1 systems use standard dispatch 171 * tables. 172 */ 173 ts_dispatch_extended = 1; 174 } 175 176 } 177 178 static void 179 set_max_mmu_ctxdoms() 180 { 181 extern uint_t max_mmu_ctxdoms; 182 int max_boards; 183 184 /* 185 * From the model, get the maximum number of boards 186 * supported and set the value accordingly. If the model 187 * could not be determined or recognized, we assume the max value. 188 */ 189 if (opl_cur_model == NULL) 190 max_boards = OPL_MAX_BOARDS; 191 else 192 max_boards = opl_cur_model->model_max_boards; 193 194 /* 195 * On OPL, cores and MMUs are one-to-one. 196 */ 197 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 198 } 199 200 #pragma weak mmu_init_large_pages 201 202 void 203 set_platform_defaults(void) 204 { 205 extern char *tod_module_name; 206 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 207 extern void mmu_init_large_pages(size_t); 208 209 /* Set the CPU signature function pointer */ 210 cpu_sgn_func = cpu_sgn_update; 211 212 /* Set appropriate tod module for OPL platform */ 213 ASSERT(tod_module_name == NULL); 214 tod_module_name = "todopl"; 215 216 if ((mmu_page_sizes == max_mmu_page_sizes) && 217 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 218 if (&mmu_init_large_pages) 219 mmu_init_large_pages(mmu_ism_pagesize); 220 } 221 222 tsb_lgrp_affinity = 1; 223 224 set_max_mmu_ctxdoms(); 225 226 /* set OPL threshold for compressed dumps */ 227 dump_plat_mincpu = DUMP_PLAT_SUN4U_OPL_MINCPU; 228 } 229 230 /* 231 * Convert logical a board number to a physical one. 232 */ 233 234 #define LSBPROP "board#" 235 #define PSBPROP "physical-board#" 236 237 int 238 opl_get_physical_board(int id) 239 { 240 dev_info_t *root_dip, *dip = NULL; 241 char *dname = NULL; 242 int circ; 243 244 pnode_t pnode; 245 char pname[MAXSYSNAME] = {0}; 246 247 int lsb_id; /* Logical System Board ID */ 248 int psb_id; /* Physical System Board ID */ 249 250 251 /* 252 * This function is called on early stage of bootup when the 253 * kernel device tree is not initialized yet, and also 254 * later on when the device tree is up. We want to try 255 * the fast track first. 256 */ 257 root_dip = ddi_root_node(); 258 if (root_dip) { 259 /* Get from devinfo node */ 260 ndi_devi_enter(root_dip, &circ); 261 for (dip = ddi_get_child(root_dip); dip; 262 dip = ddi_get_next_sibling(dip)) { 263 264 dname = ddi_node_name(dip); 265 if (strncmp(dname, "pseudo-mc", 9) != 0) 266 continue; 267 268 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 269 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 270 continue; 271 272 if (id == lsb_id) { 273 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 274 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 275 == -1) { 276 ndi_devi_exit(root_dip, circ); 277 return (-1); 278 } else { 279 ndi_devi_exit(root_dip, circ); 280 return (psb_id); 281 } 282 } 283 } 284 ndi_devi_exit(root_dip, circ); 285 } 286 287 /* 288 * We do not have the kernel device tree, or we did not 289 * find the node for some reason (let's say the kernel 290 * device tree was modified), let's try the OBP tree. 291 */ 292 pnode = prom_rootnode(); 293 for (pnode = prom_childnode(pnode); pnode; 294 pnode = prom_nextnode(pnode)) { 295 296 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 297 (strncmp(pname, "pseudo-mc", 9) != 0)) 298 continue; 299 300 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 301 continue; 302 303 if (id == lsb_id) { 304 if (prom_getprop(pnode, PSBPROP, 305 (caddr_t)&psb_id) == -1) { 306 return (-1); 307 } else { 308 return (psb_id); 309 } 310 } 311 } 312 313 return (-1); 314 } 315 316 /* 317 * For OPL it's possible that memory from two or more successive boards 318 * will be contiguous across the boards, and therefore represented as a 319 * single chunk. 320 * This function splits such chunks down the board boundaries. 321 */ 322 static struct memlist * 323 opl_memlist_per_board(struct memlist *ml) 324 { 325 uint64_t ssize, low, high, boundary; 326 struct memlist *head, *tail, *new; 327 328 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 329 330 head = tail = NULL; 331 332 for (; ml; ml = ml->ml_next) { 333 low = (uint64_t)ml->ml_address; 334 high = low+(uint64_t)(ml->ml_size); 335 while (low < high) { 336 boundary = roundup(low+1, ssize); 337 boundary = MIN(high, boundary); 338 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 339 new->ml_address = low; 340 new->ml_size = boundary - low; 341 if (head == NULL) 342 head = new; 343 if (tail) { 344 tail->ml_next = new; 345 new->ml_prev = tail; 346 } 347 tail = new; 348 low = boundary; 349 } 350 } 351 return (head); 352 } 353 354 void 355 set_platform_cage_params(void) 356 { 357 extern pgcnt_t total_pages; 358 extern struct memlist *phys_avail; 359 struct memlist *ml, *tml; 360 361 if (kernel_cage_enable) { 362 pgcnt_t preferred_cage_size; 363 364 preferred_cage_size = MAX(opl_startup_cage_size, 365 total_pages / 256); 366 367 ml = opl_memlist_per_board(phys_avail); 368 369 /* 370 * Note: we are assuming that post has load the 371 * whole show in to the high end of memory. Having 372 * taken this leap, we copy the whole of phys_avail 373 * the glist and arrange for the cage to grow 374 * downward (descending pfns). 375 */ 376 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 377 378 /* free the memlist */ 379 do { 380 tml = ml->ml_next; 381 kmem_free(ml, sizeof (struct memlist)); 382 ml = tml; 383 } while (ml != NULL); 384 } 385 386 if (kcage_on) 387 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 388 else 389 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 390 } 391 392 /*ARGSUSED*/ 393 int 394 plat_cpu_poweron(struct cpu *cp) 395 { 396 int (*opl_cpu_poweron)(struct cpu *) = NULL; 397 398 opl_cpu_poweron = 399 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 400 401 if (opl_cpu_poweron == NULL) 402 return (ENOTSUP); 403 else 404 return ((opl_cpu_poweron)(cp)); 405 406 } 407 408 /*ARGSUSED*/ 409 int 410 plat_cpu_poweroff(struct cpu *cp) 411 { 412 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 413 414 opl_cpu_poweroff = 415 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 416 417 if (opl_cpu_poweroff == NULL) 418 return (ENOTSUP); 419 else 420 return ((opl_cpu_poweroff)(cp)); 421 422 } 423 424 int 425 plat_max_boards(void) 426 { 427 /* 428 * If the model cannot be determined, default to the max value. 429 * Otherwise, Ikkaku model only supports 1 system board. 430 */ 431 if ((opl_cur_model != NULL) && (opl_cur_model->model_type == IKKAKU)) 432 return (OPL_MAX_BOARDS_IKKAKU); 433 else 434 return (OPL_MAX_BOARDS); 435 } 436 437 int 438 plat_max_cpu_units_per_board(void) 439 { 440 return (OPL_MAX_CPU_PER_BOARD); 441 } 442 443 int 444 plat_max_mem_units_per_board(void) 445 { 446 return (OPL_MAX_MEM_UNITS_PER_BOARD); 447 } 448 449 int 450 plat_max_io_units_per_board(void) 451 { 452 return (OPL_MAX_IO_UNITS_PER_BOARD); 453 } 454 455 int 456 plat_max_cmp_units_per_board(void) 457 { 458 return (OPL_MAX_CMP_UNITS_PER_BOARD); 459 } 460 461 int 462 plat_max_core_units_per_board(void) 463 { 464 return (OPL_MAX_CORE_UNITS_PER_BOARD); 465 } 466 467 int 468 plat_pfn_to_mem_node(pfn_t pfn) 469 { 470 return (pfn >> mem_node_pfn_shift); 471 } 472 473 /* ARGSUSED */ 474 void 475 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 476 { 477 size_t elem; 478 pfn_t basepfn; 479 pgcnt_t npgs; 480 uint64_t boundary, ssize; 481 uint64_t low, high; 482 483 /* 484 * OPL mem slices are always aligned on a 256GB boundary. 485 */ 486 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 487 mem_node_physalign = 0; 488 489 /* 490 * Boot install lists are arranged <addr, len>, <addr, len>, ... 491 */ 492 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 493 for (elem = 0; elem < nelems; list++, elem++) { 494 low = list->addr; 495 high = low + list->size; 496 while (low < high) { 497 boundary = roundup(low+1, ssize); 498 boundary = MIN(high, boundary); 499 basepfn = btop(low); 500 npgs = btop(boundary - low); 501 mem_node_add_slice(basepfn, basepfn + npgs - 1); 502 low = boundary; 503 } 504 } 505 } 506 507 /* 508 * Find the CPU associated with a slice at boot-time. 509 */ 510 void 511 plat_fill_mc(pnode_t nodeid) 512 { 513 int board; 514 int memnode; 515 struct { 516 uint64_t addr; 517 uint64_t size; 518 } mem_range; 519 520 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 521 panic("Can not find board# property in mc node %x", nodeid); 522 } 523 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 524 panic("Can not find sb-mem-ranges property in mc node %x", 525 nodeid); 526 } 527 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 528 plat_assign_lgrphand_to_mem_node(board, memnode); 529 } 530 531 /* 532 * Return the platform handle for the lgroup containing the given CPU 533 * 534 * For OPL, lgroup platform handle == board #. 535 */ 536 537 extern int mpo_disabled; 538 extern lgrp_handle_t lgrp_default_handle; 539 540 lgrp_handle_t 541 plat_lgrp_cpu_to_hand(processorid_t id) 542 { 543 lgrp_handle_t plathand; 544 545 /* 546 * Return the real platform handle for the CPU until 547 * such time as we know that MPO should be disabled. 548 * At that point, we set the "mpo_disabled" flag to true, 549 * and from that point on, return the default handle. 550 * 551 * By the time we know that MPO should be disabled, the 552 * first CPU will have already been added to a leaf 553 * lgroup, but that's ok. The common lgroup code will 554 * double check that the boot CPU is in the correct place, 555 * and in the case where mpo should be disabled, will move 556 * it to the root if necessary. 557 */ 558 if (mpo_disabled) { 559 /* If MPO is disabled, return the default (UMA) handle */ 560 plathand = lgrp_default_handle; 561 } else 562 plathand = (lgrp_handle_t)LSB_ID(id); 563 return (plathand); 564 } 565 566 /* 567 * Platform specific lgroup initialization 568 */ 569 void 570 plat_lgrp_init(void) 571 { 572 extern uint32_t lgrp_expand_proc_thresh; 573 extern uint32_t lgrp_expand_proc_diff; 574 const uint_t m = LGRP_LOADAVG_THREAD_MAX; 575 576 /* 577 * Set tuneables for the OPL architecture 578 * 579 * lgrp_expand_proc_thresh is the threshold load on the set of 580 * lgroups a process is currently using on before considering 581 * adding another lgroup to the set. For Oly-C and Jupiter 582 * systems, there are four sockets per lgroup. Setting 583 * lgrp_expand_proc_thresh to add lgroups when the load reaches 584 * four threads will spread the load when it exceeds one thread 585 * per socket, optimizing memory bandwidth and L2 cache space. 586 * 587 * lgrp_expand_proc_diff determines how much less another lgroup 588 * must be loaded before shifting the start location of a thread 589 * to it. 590 * 591 * lgrp_loadavg_tolerance is the threshold where two lgroups are 592 * considered to have different loads. It is set to be less than 593 * 1% so that even a small residual load will be considered different 594 * from no residual load. 595 * 596 * We note loadavg values are not precise. 597 * Every 1/10 of a second loadavg values are reduced by 5%. 598 * This adjustment can come in the middle of the lgroup selection 599 * process, and for larger parallel apps with many threads can 600 * frequently occur between the start of the second thread 601 * placement and the finish of the last thread placement. 602 * We also must be careful to not use too small of a threshold 603 * since the cumulative decay for 1 second idle time is 40%. 604 * That is, the residual load from completed threads will still 605 * be 60% one second after the proc goes idle or 8% after 5 seconds. 606 * 607 * To allow for lag time in loadavg calculations 608 * remote thresh = 3.75 * LGRP_LOADAVG_THREAD_MAX 609 * local thresh = 0.75 * LGRP_LOADAVG_THREAD_MAX 610 * tolerance = 0.0078 * LGRP_LOADAVG_THREAD_MAX 611 * 612 * The load placement algorithms consider LGRP_LOADAVG_THREAD_MAX 613 * as the equivalent of a load of 1. To make the code more compact, 614 * we set m = LGRP_LOADAVG_THREAD_MAX. 615 */ 616 lgrp_expand_proc_thresh = (m * 3) + (m >> 1) + (m >> 2); 617 lgrp_expand_proc_diff = (m >> 1) + (m >> 2); 618 lgrp_loadavg_tolerance = (m >> 7); 619 } 620 621 /* 622 * Platform notification of lgroup (re)configuration changes 623 */ 624 /*ARGSUSED*/ 625 void 626 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 627 { 628 update_membounds_t *umb; 629 lgrp_config_mem_rename_t lmr; 630 int sbd, tbd; 631 lgrp_handle_t hand, shand, thand; 632 int mnode, snode, tnode; 633 pfn_t start, end; 634 635 if (mpo_disabled) 636 return; 637 638 switch (evt) { 639 640 case LGRP_CONFIG_MEM_ADD: 641 /* 642 * Establish the lgroup handle to memnode translation. 643 */ 644 umb = (update_membounds_t *)arg; 645 646 hand = umb->u_board; 647 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 648 plat_assign_lgrphand_to_mem_node(hand, mnode); 649 650 break; 651 652 case LGRP_CONFIG_MEM_DEL: 653 /* 654 * Special handling for possible memory holes. 655 */ 656 umb = (update_membounds_t *)arg; 657 hand = umb->u_board; 658 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 659 if (mem_node_config[mnode].exists) { 660 start = mem_node_config[mnode].physbase; 661 end = mem_node_config[mnode].physmax; 662 mem_node_del_slice(start, end); 663 } 664 } 665 666 break; 667 668 case LGRP_CONFIG_MEM_RENAME: 669 /* 670 * During a DR copy-rename operation, all of the memory 671 * on one board is moved to another board -- but the 672 * addresses/pfns and memnodes don't change. This means 673 * the memory has changed locations without changing identity. 674 * 675 * Source is where we are copying from and target is where we 676 * are copying to. After source memnode is copied to target 677 * memnode, the physical addresses of the target memnode are 678 * renamed to match what the source memnode had. Then target 679 * memnode can be removed and source memnode can take its 680 * place. 681 * 682 * To do this, swap the lgroup handle to memnode mappings for 683 * the boards, so target lgroup will have source memnode and 684 * source lgroup will have empty target memnode which is where 685 * its memory will go (if any is added to it later). 686 * 687 * Then source memnode needs to be removed from its lgroup 688 * and added to the target lgroup where the memory was living 689 * but under a different name/memnode. The memory was in the 690 * target memnode and now lives in the source memnode with 691 * different physical addresses even though it is the same 692 * memory. 693 */ 694 sbd = arg & 0xffff; 695 tbd = (arg & 0xffff0000) >> 16; 696 shand = sbd; 697 thand = tbd; 698 snode = plat_lgrphand_to_mem_node(shand); 699 tnode = plat_lgrphand_to_mem_node(thand); 700 701 /* 702 * Special handling for possible memory holes. 703 */ 704 if (tnode != -1 && mem_node_config[tnode].exists) { 705 start = mem_node_config[tnode].physbase; 706 end = mem_node_config[tnode].physmax; 707 mem_node_del_slice(start, end); 708 } 709 710 plat_assign_lgrphand_to_mem_node(thand, snode); 711 plat_assign_lgrphand_to_mem_node(shand, tnode); 712 713 lmr.lmem_rename_from = shand; 714 lmr.lmem_rename_to = thand; 715 716 /* 717 * Remove source memnode of copy rename from its lgroup 718 * and add it to its new target lgroup 719 */ 720 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 721 (uintptr_t)&lmr); 722 723 break; 724 725 default: 726 break; 727 } 728 } 729 730 /* 731 * Return latency between "from" and "to" lgroups 732 * 733 * This latency number can only be used for relative comparison 734 * between lgroups on the running system, cannot be used across platforms, 735 * and may not reflect the actual latency. It is platform and implementation 736 * specific, so platform gets to decide its value. It would be nice if the 737 * number was at least proportional to make comparisons more meaningful though. 738 * NOTE: The numbers below are supposed to be load latencies for uncached 739 * memory divided by 10. 740 * 741 */ 742 int 743 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 744 { 745 /* 746 * Return min remote latency when there are more than two lgroups 747 * (root and child) and getting latency between two different lgroups 748 * or root is involved 749 */ 750 if (lgrp_optimizations() && (from != to || 751 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 752 return (42); 753 else 754 return (35); 755 } 756 757 /* 758 * Return platform handle for root lgroup 759 */ 760 lgrp_handle_t 761 plat_lgrp_root_hand(void) 762 { 763 if (mpo_disabled) 764 return (lgrp_default_handle); 765 766 return (LGRP_DEFAULT_HANDLE); 767 } 768 769 /*ARGSUSED*/ 770 void 771 plat_freelist_process(int mnode) 772 { 773 } 774 775 void 776 load_platform_drivers(void) 777 { 778 (void) i_ddi_attach_pseudo_node("dr"); 779 } 780 781 /* 782 * No platform drivers on this platform 783 */ 784 char *platform_module_list[] = { 785 (char *)0 786 }; 787 788 /*ARGSUSED*/ 789 void 790 plat_tod_fault(enum tod_fault_type tod_bad) 791 { 792 } 793 794 /*ARGSUSED*/ 795 void 796 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 797 { 798 static void (*scf_panic_callback)(int); 799 static void (*scf_shutdown_callback)(int); 800 801 /* 802 * This is for notifing system panic/shutdown to SCF. 803 * In case of shutdown and panic, SCF call back 804 * function should be called. 805 * <SCF call back functions> 806 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 807 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 808 * cpuid should be -1 and state should be SIGST_EXIT. 809 */ 810 if (state == SIGST_EXIT && cpuid == -1) { 811 812 /* 813 * find the symbol for the SCF panic callback routine in driver 814 */ 815 if (scf_panic_callback == NULL) 816 scf_panic_callback = (void (*)(int)) 817 modgetsymvalue("scf_panic_callb", 0); 818 if (scf_shutdown_callback == NULL) 819 scf_shutdown_callback = (void (*)(int)) 820 modgetsymvalue("scf_shutdown_callb", 0); 821 822 switch (sub_state) { 823 case SIGSUBST_PANIC: 824 if (scf_panic_callback == NULL) { 825 cmn_err(CE_NOTE, "!cpu_sgn_update: " 826 "scf_panic_callb not found\n"); 827 return; 828 } 829 scf_panic_callback(SIGSUBST_PANIC); 830 break; 831 832 case SIGSUBST_HALT: 833 if (scf_shutdown_callback == NULL) { 834 cmn_err(CE_NOTE, "!cpu_sgn_update: " 835 "scf_shutdown_callb not found\n"); 836 return; 837 } 838 scf_shutdown_callback(SIGSUBST_HALT); 839 break; 840 841 case SIGSUBST_ENVIRON: 842 if (scf_shutdown_callback == NULL) { 843 cmn_err(CE_NOTE, "!cpu_sgn_update: " 844 "scf_shutdown_callb not found\n"); 845 return; 846 } 847 scf_shutdown_callback(SIGSUBST_ENVIRON); 848 break; 849 850 case SIGSUBST_REBOOT: 851 if (scf_shutdown_callback == NULL) { 852 cmn_err(CE_NOTE, "!cpu_sgn_update: " 853 "scf_shutdown_callb not found\n"); 854 return; 855 } 856 scf_shutdown_callback(SIGSUBST_REBOOT); 857 break; 858 } 859 } 860 } 861 862 /*ARGSUSED*/ 863 int 864 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 865 int flt_in_memory, ushort_t flt_status, 866 char *buf, int buflen, int *lenp) 867 { 868 /* 869 * check if it's a Memory error. 870 */ 871 if (flt_in_memory) { 872 if (opl_get_mem_unum != NULL) { 873 return (opl_get_mem_unum(synd_code, flt_addr, buf, 874 buflen, lenp)); 875 } else { 876 return (ENOTSUP); 877 } 878 } else { 879 return (ENOTSUP); 880 } 881 } 882 883 /*ARGSUSED*/ 884 int 885 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 886 { 887 int ret = 0; 888 int sb; 889 int plen; 890 891 sb = opl_get_physical_board(LSB_ID(cpuid)); 892 if (sb == -1) { 893 return (ENXIO); 894 } 895 896 /* 897 * opl_cur_model is assigned here 898 */ 899 if (opl_cur_model == NULL) { 900 set_model_info(); 901 902 /* 903 * if not matched, return 904 */ 905 if (opl_cur_model == NULL) 906 return (ENODEV); 907 } 908 909 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 910 911 switch (opl_cur_model->model_type) { 912 case FF1: 913 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 914 CHIP_ID(cpuid) / 2); 915 break; 916 917 case FF2: 918 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 919 (CHIP_ID(cpuid) / 2) + (sb * 2)); 920 break; 921 922 case DC1: 923 case DC2: 924 case DC3: 925 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 926 CHIP_ID(cpuid)); 927 break; 928 929 case IKKAKU: 930 plen = snprintf(buf, buflen, "/%s", "MBU_A"); 931 break; 932 933 default: 934 /* This should never happen */ 935 return (ENODEV); 936 } 937 938 if (plen >= buflen) { 939 ret = ENOSPC; 940 } else { 941 if (lenp) 942 *lenp = strlen(buf); 943 } 944 return (ret); 945 } 946 947 void 948 plat_nodename_set(void) 949 { 950 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 951 } 952 953 caddr_t efcode_vaddr = NULL; 954 955 /* 956 * Preallocate enough memory for fcode claims. 957 */ 958 959 caddr_t 960 efcode_alloc(caddr_t alloc_base) 961 { 962 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 963 MMU_PAGESIZE); 964 caddr_t vaddr; 965 966 /* 967 * allocate the physical memory for the Oberon fcode. 968 */ 969 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 970 efcode_size, MMU_PAGESIZE)) == NULL) 971 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 972 973 efcode_vaddr = vaddr; 974 975 return (efcode_alloc_base + efcode_size); 976 } 977 978 caddr_t 979 plat_startup_memlist(caddr_t alloc_base) 980 { 981 caddr_t tmp_alloc_base; 982 983 tmp_alloc_base = efcode_alloc(alloc_base); 984 tmp_alloc_base = 985 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 986 return (tmp_alloc_base); 987 } 988 989 /* need to forward declare these */ 990 static void plat_lock_delay(uint_t); 991 992 void 993 startup_platform(void) 994 { 995 if (clock_tick_threshold == 0) 996 clock_tick_threshold = OPL_CLOCK_TICK_THRESHOLD; 997 if (clock_tick_ncpus == 0) 998 clock_tick_ncpus = OPL_CLOCK_TICK_NCPUS; 999 mutex_lock_delay = plat_lock_delay; 1000 mutex_cap_factor = OPL_BOFF_MAX_SCALE; 1001 } 1002 1003 static uint_t 1004 get_mmu_id(processorid_t cpuid) 1005 { 1006 int pb = opl_get_physical_board(LSB_ID(cpuid)); 1007 1008 if (pb == -1) { 1009 cmn_err(CE_PANIC, 1010 "opl_get_physical_board failed (cpu %d LSB %u)", 1011 cpuid, LSB_ID(cpuid)); 1012 } 1013 return (pb * OPL_MAX_COREID_PER_BOARD) + (CHIP_ID(cpuid) * 1014 OPL_MAX_COREID_PER_CMP) + CORE_ID(cpuid); 1015 } 1016 1017 void 1018 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 1019 { 1020 int impl; 1021 1022 impl = cpunodes[cpuid].implementation; 1023 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 1024 info->mmu_idx = get_mmu_id(cpuid); 1025 info->mmu_nctxs = 8192; 1026 } else { 1027 cmn_err(CE_PANIC, "Unknown processor %d", impl); 1028 } 1029 } 1030 1031 int 1032 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 1033 { 1034 if (opl_get_mem_sid == NULL) { 1035 return (ENOTSUP); 1036 } 1037 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 1038 } 1039 1040 int 1041 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 1042 { 1043 if (opl_get_mem_offset == NULL) { 1044 return (ENOTSUP); 1045 } 1046 return (opl_get_mem_offset(paddr, offp)); 1047 } 1048 1049 int 1050 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 1051 { 1052 if (opl_get_mem_addr == NULL) { 1053 return (ENOTSUP); 1054 } 1055 return (opl_get_mem_addr(unum, sid, offset, addrp)); 1056 } 1057 1058 void 1059 plat_lock_delay(uint_t backoff) 1060 { 1061 int i; 1062 uint_t cnt, remcnt; 1063 int ctr; 1064 hrtime_t delay_start, rem_delay; 1065 /* 1066 * Platform specific lock delay code for OPL 1067 * 1068 * Using staged linear increases in the delay. 1069 * The sleep instruction is the preferred method of delay, 1070 * but is too large of granularity for the initial backoff. 1071 */ 1072 1073 if (backoff < 100) { 1074 /* 1075 * If desired backoff is long enough, 1076 * use sleep for most of it 1077 */ 1078 for (cnt = backoff; 1079 cnt >= OPL_BOFF_SLEEP; 1080 cnt -= OPL_BOFF_SLEEP) { 1081 cpu_smt_pause(); 1082 } 1083 /* 1084 * spin for small remainder of backoff 1085 */ 1086 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1087 mutex_delay_default(); 1088 } 1089 } else { 1090 /* backoff is large. Fill it by sleeping */ 1091 delay_start = gethrtime_waitfree(); 1092 cnt = backoff / OPL_BOFF_SLEEP; 1093 /* 1094 * use sleep instructions for delay 1095 */ 1096 for (i = 0; i < cnt; i++) { 1097 cpu_smt_pause(); 1098 } 1099 1100 /* 1101 * Note: if the other strand executes a sleep instruction, 1102 * then the sleep ends immediately with a minimum time of 1103 * 42 clocks. We check gethrtime to insure we have 1104 * waited long enough. And we include both a short 1105 * spin loop and a sleep for repeated delay times. 1106 */ 1107 1108 rem_delay = gethrtime_waitfree() - delay_start; 1109 while (rem_delay < cnt * OPL_BOFF_TM) { 1110 remcnt = cnt - (rem_delay / OPL_BOFF_TM); 1111 for (i = 0; i < remcnt; i++) { 1112 cpu_smt_pause(); 1113 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1114 mutex_delay_default(); 1115 } 1116 } 1117 rem_delay = gethrtime_waitfree() - delay_start; 1118 } 1119 } 1120 } 1121 1122 /* 1123 * The following code implements asynchronous call to XSCF to setup the 1124 * domain node name. 1125 */ 1126 1127 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1128 1129 /* 1130 * The following three macros define the all operations on the request 1131 * list we are using here, and hide the details of the list 1132 * implementation from the code. 1133 */ 1134 #define PUSH(m) \ 1135 { \ 1136 (m)->next = ctl_msg.head; \ 1137 (m)->prev = NULL; \ 1138 if ((m)->next != NULL) \ 1139 (m)->next->prev = (m); \ 1140 ctl_msg.head = (m); \ 1141 } 1142 1143 #define REMOVE(m) \ 1144 { \ 1145 if ((m)->prev != NULL) \ 1146 (m)->prev->next = (m)->next; \ 1147 else \ 1148 ctl_msg.head = (m)->next; \ 1149 if ((m)->next != NULL) \ 1150 (m)->next->prev = (m)->prev; \ 1151 } 1152 1153 #define FREE_THE_TAIL(head) \ 1154 { \ 1155 nm_msg_t *n_msg, *m; \ 1156 m = (head)->next; \ 1157 (head)->next = NULL; \ 1158 while (m != NULL) { \ 1159 n_msg = m->next; \ 1160 FREE_MSG(m); \ 1161 m = n_msg; \ 1162 } \ 1163 } 1164 1165 #define SCF_PUTINFO(f, s, p) \ 1166 f(KEY_ESCF, 0x01, 0, s, p) 1167 1168 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1169 (m)->len, (m)->data)) == 0) 1170 1171 /* 1172 * The value of the following macro loosely depends on the 1173 * value of the "device busy" timeout used in the SCF driver. 1174 * (See pass2xscf_thread()). 1175 */ 1176 #define SCF_DEVBUSY_DELAY 10 1177 1178 /* 1179 * The default number of attempts to contact the scf driver 1180 * if we cannot fetch any information about the timeout value 1181 * it uses. 1182 */ 1183 1184 #define REPEATS 4 1185 1186 typedef struct nm_msg { 1187 struct nm_msg *next; 1188 struct nm_msg *prev; 1189 int len; 1190 char data[1]; 1191 } nm_msg_t; 1192 1193 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1194 1195 static struct ctlmsg { 1196 nm_msg_t *head; 1197 nm_msg_t *now_serving; 1198 kmutex_t nm_lock; 1199 kthread_t *nmt; 1200 int cnt; 1201 int (*scf_service_function)(uint32_t, uint8_t, 1202 uint32_t, uint32_t, void *); 1203 } ctl_msg; 1204 1205 static void 1206 post_xscf_msg(char *dp, int len) 1207 { 1208 nm_msg_t *msg; 1209 1210 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1211 1212 bcopy(dp, msg->data, len); 1213 msg->len = len; 1214 1215 mutex_enter(&ctl_msg.nm_lock); 1216 if (ctl_msg.nmt == NULL) { 1217 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1218 NULL, 0, &p0, TS_RUN, minclsyspri); 1219 } 1220 1221 PUSH(msg); 1222 ctl_msg.cnt++; 1223 mutex_exit(&ctl_msg.nm_lock); 1224 } 1225 1226 static void 1227 pass2xscf_thread() 1228 { 1229 nm_msg_t *msg; 1230 int ret; 1231 uint_t i, msg_sent, xscf_driver_delay; 1232 static uint_t repeat_cnt; 1233 uint_t *scf_wait_cnt; 1234 1235 mutex_enter(&ctl_msg.nm_lock); 1236 1237 /* 1238 * Find the address of the SCF put routine if it's not done yet. 1239 */ 1240 if (ctl_msg.scf_service_function == NULL) { 1241 if ((ctl_msg.scf_service_function = 1242 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1243 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1244 cmn_err(CE_NOTE, "pass2xscf_thread: " 1245 "scf_service_putinfo not found\n"); 1246 ctl_msg.nmt = NULL; 1247 mutex_exit(&ctl_msg.nm_lock); 1248 return; 1249 } 1250 } 1251 1252 /* 1253 * Calculate the number of attempts to connect XSCF based on the 1254 * scf driver delay (which is 1255 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1256 * of xscf_connect_delay (the total number of seconds to wait 1257 * till xscf get ready.) 1258 */ 1259 if (repeat_cnt == 0) { 1260 if ((scf_wait_cnt = 1261 (uint_t *) 1262 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1263 repeat_cnt = REPEATS; 1264 } else { 1265 1266 xscf_driver_delay = *scf_wait_cnt * 1267 SCF_DEVBUSY_DELAY; 1268 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1269 } 1270 } 1271 1272 while (ctl_msg.cnt != 0) { 1273 1274 /* 1275 * Take the very last request from the queue, 1276 */ 1277 ctl_msg.now_serving = ctl_msg.head; 1278 ASSERT(ctl_msg.now_serving != NULL); 1279 1280 /* 1281 * and discard all the others if any. 1282 */ 1283 FREE_THE_TAIL(ctl_msg.now_serving); 1284 ctl_msg.cnt = 1; 1285 mutex_exit(&ctl_msg.nm_lock); 1286 1287 /* 1288 * Pass the name to XSCF. Note please, we do not hold the 1289 * mutex while we are doing this. 1290 */ 1291 msg_sent = 0; 1292 for (i = 0; i < repeat_cnt; i++) { 1293 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1294 msg_sent = 1; 1295 break; 1296 } else { 1297 if (ret != EBUSY) { 1298 cmn_err(CE_NOTE, "pass2xscf_thread:" 1299 " unexpected return code" 1300 " from scf_service_putinfo():" 1301 " %d\n", ret); 1302 } 1303 } 1304 } 1305 1306 if (msg_sent) { 1307 1308 /* 1309 * Remove the request from the list 1310 */ 1311 mutex_enter(&ctl_msg.nm_lock); 1312 msg = ctl_msg.now_serving; 1313 ctl_msg.now_serving = NULL; 1314 REMOVE(msg); 1315 ctl_msg.cnt--; 1316 mutex_exit(&ctl_msg.nm_lock); 1317 FREE_MSG(msg); 1318 } else { 1319 1320 /* 1321 * If while we have tried to communicate with 1322 * XSCF there were any other requests we are 1323 * going to drop this one and take the latest 1324 * one. Otherwise we will try to pass this one 1325 * again. 1326 */ 1327 cmn_err(CE_NOTE, 1328 "pass2xscf_thread: " 1329 "scf_service_putinfo " 1330 "not responding\n"); 1331 } 1332 mutex_enter(&ctl_msg.nm_lock); 1333 } 1334 1335 /* 1336 * The request queue is empty, exit. 1337 */ 1338 ctl_msg.nmt = NULL; 1339 mutex_exit(&ctl_msg.nm_lock); 1340 } 1341