1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/cpuvar.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/promif.h> 32 #include <sys/platform_module.h> 33 #include <sys/cmn_err.h> 34 #include <sys/errno.h> 35 #include <sys/machsystm.h> 36 #include <sys/bootconf.h> 37 #include <sys/nvpair.h> 38 #include <sys/kobj.h> 39 #include <sys/mem_cage.h> 40 #include <sys/opl.h> 41 #include <sys/scfd/scfostoescf.h> 42 #include <sys/cpu_sgnblk_defs.h> 43 #include <sys/utsname.h> 44 #include <sys/ddi.h> 45 #include <sys/sunndi.h> 46 #include <sys/lgrp.h> 47 #include <sys/memnode.h> 48 #include <sys/sysmacros.h> 49 #include <sys/time.h> 50 #include <sys/cpu.h> 51 #include <vm/vm_dep.h> 52 53 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 54 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 55 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 56 int (*opl_get_mem_addr)(char *unum, char *sid, 57 uint64_t offset, uint64_t *paddr); 58 59 /* Memory for fcode claims. 16k times # maximum possible IO units */ 60 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 61 int efcode_size = EFCODE_SIZE; 62 63 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 64 65 /* Set the maximum number of boards for DR */ 66 int opl_boards = OPL_MAX_BOARDS; 67 68 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 69 70 extern int tsb_lgrp_affinity; 71 72 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 73 (OPL_MAX_TSBS_PER_PCICH); 74 75 pgcnt_t opl_startup_cage_size = 0; 76 77 /* 78 * The length of the delay in seconds in communication with XSCF after 79 * which the warning message will be logged. 80 */ 81 uint_t xscf_connect_delay = 60 * 15; 82 83 static opl_model_info_t opl_models[] = { 84 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 85 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 86 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 87 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 88 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 89 }; 90 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 91 92 /* 93 * opl_cur_model 94 */ 95 static opl_model_info_t *opl_cur_model = NULL; 96 97 static struct memlist *opl_memlist_per_board(struct memlist *ml); 98 static void post_xscf_msg(char *, int); 99 static void pass2xscf_thread(); 100 101 /* 102 * Note FF/DC out-of-order instruction engine takes only a 103 * single cycle to execute each spin loop 104 * for comparison, Panther takes 6 cycles for same loop 105 * 1500 approx nsec for OPL sleep instruction 106 * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then 107 * spin time should be equal to OPL_BOFF_TM nsecs 108 * Listed values tuned for 2.15GHz to 2.4GHz systems 109 * Value may change for future systems 110 */ 111 #define OPL_BOFF_SPIN 720 112 #define OPL_BOFF_BASE 1 113 #define OPL_BOFF_SLEEP 5 114 #define OPL_BOFF_CAP1 20 115 #define OPL_BOFF_CAP2 60 116 #define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP) 117 #define OPL_BOFF_TM 1500 118 119 int 120 set_platform_max_ncpus(void) 121 { 122 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 123 } 124 125 int 126 set_platform_tsb_spares(void) 127 { 128 return (MIN(opl_tsb_spares, MAX_UPA)); 129 } 130 131 static void 132 set_model_info() 133 { 134 extern int ts_dispatch_extended; 135 char name[MAXSYSNAME]; 136 int i; 137 138 /* 139 * Get model name from the root node. 140 * 141 * We are using the prom device tree since, at this point, 142 * the Solaris device tree is not yet setup. 143 */ 144 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 145 146 for (i = 0; i < opl_num_models; i++) { 147 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 148 opl_cur_model = &opl_models[i]; 149 break; 150 } 151 } 152 153 if (i == opl_num_models) 154 halt("No valid OPL model is found!"); 155 156 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 157 (ts_dispatch_extended == -1)) { 158 /* 159 * Based on a platform model, select a dispatch table. 160 * Only DC2 and DC3 systems uses the alternate/extended 161 * TS dispatch table. 162 * FF1, FF2 and DC1 systems used standard dispatch tables. 163 */ 164 ts_dispatch_extended = 1; 165 } 166 167 } 168 169 static void 170 set_max_mmu_ctxdoms() 171 { 172 extern uint_t max_mmu_ctxdoms; 173 int max_boards; 174 175 /* 176 * From the model, get the maximum number of boards 177 * supported and set the value accordingly. If the model 178 * could not be determined or recognized, we assume the max value. 179 */ 180 if (opl_cur_model == NULL) 181 max_boards = OPL_MAX_BOARDS; 182 else 183 max_boards = opl_cur_model->model_max_boards; 184 185 /* 186 * On OPL, cores and MMUs are one-to-one. 187 */ 188 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 189 } 190 191 #pragma weak mmu_init_large_pages 192 193 void 194 set_platform_defaults(void) 195 { 196 extern char *tod_module_name; 197 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 198 extern void mmu_init_large_pages(size_t); 199 200 /* Set the CPU signature function pointer */ 201 cpu_sgn_func = cpu_sgn_update; 202 203 /* Set appropriate tod module for OPL platform */ 204 ASSERT(tod_module_name == NULL); 205 tod_module_name = "todopl"; 206 207 if ((mmu_page_sizes == max_mmu_page_sizes) && 208 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 209 if (&mmu_init_large_pages) 210 mmu_init_large_pages(mmu_ism_pagesize); 211 } 212 213 tsb_lgrp_affinity = 1; 214 215 set_max_mmu_ctxdoms(); 216 } 217 218 /* 219 * Convert logical a board number to a physical one. 220 */ 221 222 #define LSBPROP "board#" 223 #define PSBPROP "physical-board#" 224 225 int 226 opl_get_physical_board(int id) 227 { 228 dev_info_t *root_dip, *dip = NULL; 229 char *dname = NULL; 230 int circ; 231 232 pnode_t pnode; 233 char pname[MAXSYSNAME] = {0}; 234 235 int lsb_id; /* Logical System Board ID */ 236 int psb_id; /* Physical System Board ID */ 237 238 239 /* 240 * This function is called on early stage of bootup when the 241 * kernel device tree is not initialized yet, and also 242 * later on when the device tree is up. We want to try 243 * the fast track first. 244 */ 245 root_dip = ddi_root_node(); 246 if (root_dip) { 247 /* Get from devinfo node */ 248 ndi_devi_enter(root_dip, &circ); 249 for (dip = ddi_get_child(root_dip); dip; 250 dip = ddi_get_next_sibling(dip)) { 251 252 dname = ddi_node_name(dip); 253 if (strncmp(dname, "pseudo-mc", 9) != 0) 254 continue; 255 256 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 257 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 258 continue; 259 260 if (id == lsb_id) { 261 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 262 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 263 == -1) { 264 ndi_devi_exit(root_dip, circ); 265 return (-1); 266 } else { 267 ndi_devi_exit(root_dip, circ); 268 return (psb_id); 269 } 270 } 271 } 272 ndi_devi_exit(root_dip, circ); 273 } 274 275 /* 276 * We do not have the kernel device tree, or we did not 277 * find the node for some reason (let's say the kernel 278 * device tree was modified), let's try the OBP tree. 279 */ 280 pnode = prom_rootnode(); 281 for (pnode = prom_childnode(pnode); pnode; 282 pnode = prom_nextnode(pnode)) { 283 284 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 285 (strncmp(pname, "pseudo-mc", 9) != 0)) 286 continue; 287 288 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 289 continue; 290 291 if (id == lsb_id) { 292 if (prom_getprop(pnode, PSBPROP, 293 (caddr_t)&psb_id) == -1) { 294 return (-1); 295 } else { 296 return (psb_id); 297 } 298 } 299 } 300 301 return (-1); 302 } 303 304 /* 305 * For OPL it's possible that memory from two or more successive boards 306 * will be contiguous across the boards, and therefore represented as a 307 * single chunk. 308 * This function splits such chunks down the board boundaries. 309 */ 310 static struct memlist * 311 opl_memlist_per_board(struct memlist *ml) 312 { 313 uint64_t ssize, low, high, boundary; 314 struct memlist *head, *tail, *new; 315 316 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 317 318 head = tail = NULL; 319 320 for (; ml; ml = ml->next) { 321 low = (uint64_t)ml->address; 322 high = low+(uint64_t)(ml->size); 323 while (low < high) { 324 boundary = roundup(low+1, ssize); 325 boundary = MIN(high, boundary); 326 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 327 new->address = low; 328 new->size = boundary - low; 329 if (head == NULL) 330 head = new; 331 if (tail) { 332 tail->next = new; 333 new->prev = tail; 334 } 335 tail = new; 336 low = boundary; 337 } 338 } 339 return (head); 340 } 341 342 void 343 set_platform_cage_params(void) 344 { 345 extern pgcnt_t total_pages; 346 extern struct memlist *phys_avail; 347 struct memlist *ml, *tml; 348 349 if (kernel_cage_enable) { 350 pgcnt_t preferred_cage_size; 351 352 preferred_cage_size = MAX(opl_startup_cage_size, 353 total_pages / 256); 354 355 ml = opl_memlist_per_board(phys_avail); 356 357 /* 358 * Note: we are assuming that post has load the 359 * whole show in to the high end of memory. Having 360 * taken this leap, we copy the whole of phys_avail 361 * the glist and arrange for the cage to grow 362 * downward (descending pfns). 363 */ 364 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 365 366 /* free the memlist */ 367 do { 368 tml = ml->next; 369 kmem_free(ml, sizeof (struct memlist)); 370 ml = tml; 371 } while (ml != NULL); 372 } 373 374 if (kcage_on) 375 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 376 else 377 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 378 } 379 380 /*ARGSUSED*/ 381 int 382 plat_cpu_poweron(struct cpu *cp) 383 { 384 int (*opl_cpu_poweron)(struct cpu *) = NULL; 385 386 opl_cpu_poweron = 387 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 388 389 if (opl_cpu_poweron == NULL) 390 return (ENOTSUP); 391 else 392 return ((opl_cpu_poweron)(cp)); 393 394 } 395 396 /*ARGSUSED*/ 397 int 398 plat_cpu_poweroff(struct cpu *cp) 399 { 400 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 401 402 opl_cpu_poweroff = 403 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 404 405 if (opl_cpu_poweroff == NULL) 406 return (ENOTSUP); 407 else 408 return ((opl_cpu_poweroff)(cp)); 409 410 } 411 412 int 413 plat_max_boards(void) 414 { 415 return (OPL_MAX_BOARDS); 416 } 417 418 int 419 plat_max_cpu_units_per_board(void) 420 { 421 return (OPL_MAX_CPU_PER_BOARD); 422 } 423 424 int 425 plat_max_mem_units_per_board(void) 426 { 427 return (OPL_MAX_MEM_UNITS_PER_BOARD); 428 } 429 430 int 431 plat_max_io_units_per_board(void) 432 { 433 return (OPL_MAX_IO_UNITS_PER_BOARD); 434 } 435 436 int 437 plat_max_cmp_units_per_board(void) 438 { 439 return (OPL_MAX_CMP_UNITS_PER_BOARD); 440 } 441 442 int 443 plat_max_core_units_per_board(void) 444 { 445 return (OPL_MAX_CORE_UNITS_PER_BOARD); 446 } 447 448 int 449 plat_pfn_to_mem_node(pfn_t pfn) 450 { 451 return (pfn >> mem_node_pfn_shift); 452 } 453 454 /* ARGSUSED */ 455 void 456 plat_build_mem_nodes(u_longlong_t *list, size_t nelems) 457 { 458 size_t elem; 459 pfn_t basepfn; 460 pgcnt_t npgs; 461 uint64_t boundary, ssize; 462 uint64_t low, high; 463 464 /* 465 * OPL mem slices are always aligned on a 256GB boundary. 466 */ 467 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 468 mem_node_physalign = 0; 469 470 /* 471 * Boot install lists are arranged <addr, len>, <addr, len>, ... 472 */ 473 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 474 for (elem = 0; elem < nelems; elem += 2) { 475 low = (uint64_t)list[elem]; 476 high = low+(uint64_t)(list[elem+1]); 477 while (low < high) { 478 boundary = roundup(low+1, ssize); 479 boundary = MIN(high, boundary); 480 basepfn = btop(low); 481 npgs = btop(boundary - low); 482 mem_node_add_slice(basepfn, basepfn + npgs - 1); 483 low = boundary; 484 } 485 } 486 } 487 488 /* 489 * Find the CPU associated with a slice at boot-time. 490 */ 491 void 492 plat_fill_mc(pnode_t nodeid) 493 { 494 int board; 495 int memnode; 496 struct { 497 uint64_t addr; 498 uint64_t size; 499 } mem_range; 500 501 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 502 panic("Can not find board# property in mc node %x", nodeid); 503 } 504 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 505 panic("Can not find sb-mem-ranges property in mc node %x", 506 nodeid); 507 } 508 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 509 plat_assign_lgrphand_to_mem_node(board, memnode); 510 } 511 512 /* 513 * Return the platform handle for the lgroup containing the given CPU 514 * 515 * For OPL, lgroup platform handle == board #. 516 */ 517 518 extern int mpo_disabled; 519 extern lgrp_handle_t lgrp_default_handle; 520 521 lgrp_handle_t 522 plat_lgrp_cpu_to_hand(processorid_t id) 523 { 524 lgrp_handle_t plathand; 525 526 /* 527 * Return the real platform handle for the CPU until 528 * such time as we know that MPO should be disabled. 529 * At that point, we set the "mpo_disabled" flag to true, 530 * and from that point on, return the default handle. 531 * 532 * By the time we know that MPO should be disabled, the 533 * first CPU will have already been added to a leaf 534 * lgroup, but that's ok. The common lgroup code will 535 * double check that the boot CPU is in the correct place, 536 * and in the case where mpo should be disabled, will move 537 * it to the root if necessary. 538 */ 539 if (mpo_disabled) { 540 /* If MPO is disabled, return the default (UMA) handle */ 541 plathand = lgrp_default_handle; 542 } else 543 plathand = (lgrp_handle_t)LSB_ID(id); 544 return (plathand); 545 } 546 547 /* 548 * Platform specific lgroup initialization 549 */ 550 void 551 plat_lgrp_init(void) 552 { 553 extern uint32_t lgrp_expand_proc_thresh; 554 extern uint32_t lgrp_expand_proc_diff; 555 556 /* 557 * Set tuneables for the OPL architecture 558 * 559 * lgrp_expand_proc_thresh is the minimum load on the lgroups 560 * this process is currently running on before considering 561 * expanding threads to another lgroup. 562 * 563 * lgrp_expand_proc_diff determines how much less the remote lgroup 564 * must be loaded before expanding to it. 565 * 566 * Since remote latencies can be costly, attempt to keep 3 threads 567 * within the same lgroup before expanding to the next lgroup. 568 */ 569 lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX * 3; 570 lgrp_expand_proc_diff = LGRP_LOADAVG_THREAD_MAX; 571 } 572 573 /* 574 * Platform notification of lgroup (re)configuration changes 575 */ 576 /*ARGSUSED*/ 577 void 578 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 579 { 580 update_membounds_t *umb; 581 lgrp_config_mem_rename_t lmr; 582 int sbd, tbd; 583 lgrp_handle_t hand, shand, thand; 584 int mnode, snode, tnode; 585 pfn_t start, end; 586 587 if (mpo_disabled) 588 return; 589 590 switch (evt) { 591 592 case LGRP_CONFIG_MEM_ADD: 593 /* 594 * Establish the lgroup handle to memnode translation. 595 */ 596 umb = (update_membounds_t *)arg; 597 598 hand = umb->u_board; 599 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 600 plat_assign_lgrphand_to_mem_node(hand, mnode); 601 602 break; 603 604 case LGRP_CONFIG_MEM_DEL: 605 /* 606 * Special handling for possible memory holes. 607 */ 608 umb = (update_membounds_t *)arg; 609 hand = umb->u_board; 610 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 611 if (mem_node_config[mnode].exists) { 612 start = mem_node_config[mnode].physbase; 613 end = mem_node_config[mnode].physmax; 614 mem_node_pre_del_slice(start, end); 615 mem_node_post_del_slice(start, end, 0); 616 } 617 } 618 619 break; 620 621 case LGRP_CONFIG_MEM_RENAME: 622 /* 623 * During a DR copy-rename operation, all of the memory 624 * on one board is moved to another board -- but the 625 * addresses/pfns and memnodes don't change. This means 626 * the memory has changed locations without changing identity. 627 * 628 * Source is where we are copying from and target is where we 629 * are copying to. After source memnode is copied to target 630 * memnode, the physical addresses of the target memnode are 631 * renamed to match what the source memnode had. Then target 632 * memnode can be removed and source memnode can take its 633 * place. 634 * 635 * To do this, swap the lgroup handle to memnode mappings for 636 * the boards, so target lgroup will have source memnode and 637 * source lgroup will have empty target memnode which is where 638 * its memory will go (if any is added to it later). 639 * 640 * Then source memnode needs to be removed from its lgroup 641 * and added to the target lgroup where the memory was living 642 * but under a different name/memnode. The memory was in the 643 * target memnode and now lives in the source memnode with 644 * different physical addresses even though it is the same 645 * memory. 646 */ 647 sbd = arg & 0xffff; 648 tbd = (arg & 0xffff0000) >> 16; 649 shand = sbd; 650 thand = tbd; 651 snode = plat_lgrphand_to_mem_node(shand); 652 tnode = plat_lgrphand_to_mem_node(thand); 653 654 /* 655 * Special handling for possible memory holes. 656 */ 657 if (tnode != -1 && mem_node_config[tnode].exists) { 658 start = mem_node_config[tnode].physbase; 659 end = mem_node_config[tnode].physmax; 660 mem_node_pre_del_slice(start, end); 661 mem_node_post_del_slice(start, end, 0); 662 } 663 664 plat_assign_lgrphand_to_mem_node(thand, snode); 665 plat_assign_lgrphand_to_mem_node(shand, tnode); 666 667 lmr.lmem_rename_from = shand; 668 lmr.lmem_rename_to = thand; 669 670 /* 671 * Remove source memnode of copy rename from its lgroup 672 * and add it to its new target lgroup 673 */ 674 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 675 (uintptr_t)&lmr); 676 677 break; 678 679 default: 680 break; 681 } 682 } 683 684 /* 685 * Return latency between "from" and "to" lgroups 686 * 687 * This latency number can only be used for relative comparison 688 * between lgroups on the running system, cannot be used across platforms, 689 * and may not reflect the actual latency. It is platform and implementation 690 * specific, so platform gets to decide its value. It would be nice if the 691 * number was at least proportional to make comparisons more meaningful though. 692 * NOTE: The numbers below are supposed to be load latencies for uncached 693 * memory divided by 10. 694 * 695 */ 696 int 697 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 698 { 699 /* 700 * Return min remote latency when there are more than two lgroups 701 * (root and child) and getting latency between two different lgroups 702 * or root is involved 703 */ 704 if (lgrp_optimizations() && (from != to || 705 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 706 return (42); 707 else 708 return (35); 709 } 710 711 /* 712 * Return platform handle for root lgroup 713 */ 714 lgrp_handle_t 715 plat_lgrp_root_hand(void) 716 { 717 if (mpo_disabled) 718 return (lgrp_default_handle); 719 720 return (LGRP_DEFAULT_HANDLE); 721 } 722 723 /*ARGSUSED*/ 724 void 725 plat_freelist_process(int mnode) 726 { 727 } 728 729 void 730 load_platform_drivers(void) 731 { 732 (void) i_ddi_attach_pseudo_node("dr"); 733 } 734 735 /* 736 * No platform drivers on this platform 737 */ 738 char *platform_module_list[] = { 739 (char *)0 740 }; 741 742 /*ARGSUSED*/ 743 void 744 plat_tod_fault(enum tod_fault_type tod_bad) 745 { 746 } 747 748 /*ARGSUSED*/ 749 void 750 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 751 { 752 static void (*scf_panic_callback)(int); 753 static void (*scf_shutdown_callback)(int); 754 755 /* 756 * This is for notifing system panic/shutdown to SCF. 757 * In case of shutdown and panic, SCF call back 758 * function should be called. 759 * <SCF call back functions> 760 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 761 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 762 * cpuid should be -1 and state should be SIGST_EXIT. 763 */ 764 if (state == SIGST_EXIT && cpuid == -1) { 765 766 /* 767 * find the symbol for the SCF panic callback routine in driver 768 */ 769 if (scf_panic_callback == NULL) 770 scf_panic_callback = (void (*)(int)) 771 modgetsymvalue("scf_panic_callb", 0); 772 if (scf_shutdown_callback == NULL) 773 scf_shutdown_callback = (void (*)(int)) 774 modgetsymvalue("scf_shutdown_callb", 0); 775 776 switch (sub_state) { 777 case SIGSUBST_PANIC: 778 if (scf_panic_callback == NULL) { 779 cmn_err(CE_NOTE, "!cpu_sgn_update: " 780 "scf_panic_callb not found\n"); 781 return; 782 } 783 scf_panic_callback(SIGSUBST_PANIC); 784 break; 785 786 case SIGSUBST_HALT: 787 if (scf_shutdown_callback == NULL) { 788 cmn_err(CE_NOTE, "!cpu_sgn_update: " 789 "scf_shutdown_callb not found\n"); 790 return; 791 } 792 scf_shutdown_callback(SIGSUBST_HALT); 793 break; 794 795 case SIGSUBST_ENVIRON: 796 if (scf_shutdown_callback == NULL) { 797 cmn_err(CE_NOTE, "!cpu_sgn_update: " 798 "scf_shutdown_callb not found\n"); 799 return; 800 } 801 scf_shutdown_callback(SIGSUBST_ENVIRON); 802 break; 803 804 case SIGSUBST_REBOOT: 805 if (scf_shutdown_callback == NULL) { 806 cmn_err(CE_NOTE, "!cpu_sgn_update: " 807 "scf_shutdown_callb not found\n"); 808 return; 809 } 810 scf_shutdown_callback(SIGSUBST_REBOOT); 811 break; 812 } 813 } 814 } 815 816 /*ARGSUSED*/ 817 int 818 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 819 int flt_in_memory, ushort_t flt_status, 820 char *buf, int buflen, int *lenp) 821 { 822 /* 823 * check if it's a Memory error. 824 */ 825 if (flt_in_memory) { 826 if (opl_get_mem_unum != NULL) { 827 return (opl_get_mem_unum(synd_code, flt_addr, buf, 828 buflen, lenp)); 829 } else { 830 return (ENOTSUP); 831 } 832 } else { 833 return (ENOTSUP); 834 } 835 } 836 837 /*ARGSUSED*/ 838 int 839 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 840 { 841 int ret = 0; 842 int sb; 843 int plen; 844 845 sb = opl_get_physical_board(LSB_ID(cpuid)); 846 if (sb == -1) { 847 return (ENXIO); 848 } 849 850 /* 851 * opl_cur_model is assigned here 852 */ 853 if (opl_cur_model == NULL) { 854 set_model_info(); 855 } 856 857 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 858 859 switch (opl_cur_model->model_type) { 860 case FF1: 861 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 862 CHIP_ID(cpuid) / 2); 863 break; 864 865 case FF2: 866 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 867 (CHIP_ID(cpuid) / 2) + (sb * 2)); 868 break; 869 870 case DC1: 871 case DC2: 872 case DC3: 873 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 874 CHIP_ID(cpuid)); 875 break; 876 877 default: 878 /* This should never happen */ 879 return (ENODEV); 880 } 881 882 if (plen >= buflen) { 883 ret = ENOSPC; 884 } else { 885 if (lenp) 886 *lenp = strlen(buf); 887 } 888 return (ret); 889 } 890 891 void 892 plat_nodename_set(void) 893 { 894 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 895 } 896 897 caddr_t efcode_vaddr = NULL; 898 899 /* 900 * Preallocate enough memory for fcode claims. 901 */ 902 903 caddr_t 904 efcode_alloc(caddr_t alloc_base) 905 { 906 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 907 MMU_PAGESIZE); 908 caddr_t vaddr; 909 910 /* 911 * allocate the physical memory for the Oberon fcode. 912 */ 913 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 914 efcode_size, MMU_PAGESIZE)) == NULL) 915 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 916 917 efcode_vaddr = vaddr; 918 919 return (efcode_alloc_base + efcode_size); 920 } 921 922 caddr_t 923 plat_startup_memlist(caddr_t alloc_base) 924 { 925 caddr_t tmp_alloc_base; 926 927 tmp_alloc_base = efcode_alloc(alloc_base); 928 tmp_alloc_base = 929 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 930 return (tmp_alloc_base); 931 } 932 933 void 934 startup_platform(void) 935 { 936 } 937 938 void 939 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 940 { 941 int impl; 942 943 impl = cpunodes[cpuid].implementation; 944 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 945 info->mmu_idx = MMU_ID(cpuid); 946 info->mmu_nctxs = 8192; 947 } else { 948 cmn_err(CE_PANIC, "Unknown processor %d", impl); 949 } 950 } 951 952 int 953 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 954 { 955 if (opl_get_mem_sid == NULL) { 956 return (ENOTSUP); 957 } 958 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 959 } 960 961 int 962 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 963 { 964 if (opl_get_mem_offset == NULL) { 965 return (ENOTSUP); 966 } 967 return (opl_get_mem_offset(paddr, offp)); 968 } 969 970 int 971 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 972 { 973 if (opl_get_mem_addr == NULL) { 974 return (ENOTSUP); 975 } 976 return (opl_get_mem_addr(unum, sid, offset, addrp)); 977 } 978 979 void 980 plat_lock_delay(int *backoff) 981 { 982 int i; 983 int cnt; 984 int flag; 985 int ctr; 986 hrtime_t delay_start; 987 /* 988 * Platform specific lock delay code for OPL 989 * 990 * Using staged linear increases in the delay. 991 * The sleep instruction is the preferred method of delay, 992 * but is too large of granularity for the initial backoff. 993 */ 994 995 if (*backoff == 0) *backoff = OPL_BOFF_BASE; 996 997 flag = !*backoff; 998 999 if (*backoff < OPL_BOFF_CAP1) { 1000 /* 1001 * If desired backoff is long enough, 1002 * use sleep for most of it 1003 */ 1004 for (cnt = *backoff; cnt >= OPL_BOFF_SLEEP; 1005 cnt -= OPL_BOFF_SLEEP) { 1006 cpu_smt_pause(); 1007 } 1008 /* 1009 * spin for small remainder of backoff 1010 * 1011 * fake call to nulldev included to prevent 1012 * compiler from optimizing out the spin loop 1013 */ 1014 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1015 if (flag) (void) nulldev(); 1016 } 1017 } else { 1018 /* backoff is very large. Fill it by sleeping */ 1019 delay_start = gethrtime(); 1020 cnt = *backoff/OPL_BOFF_SLEEP; 1021 /* 1022 * use sleep instructions for delay 1023 */ 1024 for (i = 0; i < cnt; i++) { 1025 cpu_smt_pause(); 1026 } 1027 1028 /* 1029 * Note: if the other strand executes a sleep instruction, 1030 * then the sleep ends immediately with a minimum time of 1031 * 42 clocks. We check gethrtime to insure we have 1032 * waited long enough. And we include both a short 1033 * spin loop and a sleep for any final delay time. 1034 */ 1035 1036 while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) { 1037 cpu_smt_pause(); 1038 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1039 if (flag) (void) nulldev(); 1040 } 1041 } 1042 } 1043 1044 /* 1045 * We adjust the backoff in three linear stages 1046 * The initial stage has small increases as this phase is 1047 * usually handle locks with light contention. We don't want 1048 * to have a long backoff on a lock that is available. 1049 * 1050 * In the second stage, we are in transition, unsure whether 1051 * the lock is under heavy contention. As the failures to 1052 * obtain the lock increase, we back off further. 1053 * 1054 * For the final stage, we are in a heavily contended or 1055 * long held long so we want to reduce the number of tries. 1056 */ 1057 if (*backoff < OPL_BOFF_CAP1) { 1058 *backoff += 1; 1059 } else { 1060 if (*backoff < OPL_BOFF_CAP2) { 1061 *backoff += OPL_BOFF_SLEEP; 1062 } else { 1063 *backoff += 2 * OPL_BOFF_SLEEP; 1064 } 1065 if (*backoff > OPL_BOFF_MAX) { 1066 *backoff = OPL_BOFF_MAX; 1067 } 1068 } 1069 } 1070 1071 /* 1072 * The following code implements asynchronous call to XSCF to setup the 1073 * domain node name. 1074 */ 1075 1076 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1077 1078 /* 1079 * The following three macros define the all operations on the request 1080 * list we are using here, and hide the details of the list 1081 * implementation from the code. 1082 */ 1083 #define PUSH(m) \ 1084 { \ 1085 (m)->next = ctl_msg.head; \ 1086 (m)->prev = NULL; \ 1087 if ((m)->next != NULL) \ 1088 (m)->next->prev = (m); \ 1089 ctl_msg.head = (m); \ 1090 } 1091 1092 #define REMOVE(m) \ 1093 { \ 1094 if ((m)->prev != NULL) \ 1095 (m)->prev->next = (m)->next; \ 1096 else \ 1097 ctl_msg.head = (m)->next; \ 1098 if ((m)->next != NULL) \ 1099 (m)->next->prev = (m)->prev; \ 1100 } 1101 1102 #define FREE_THE_TAIL(head) \ 1103 { \ 1104 nm_msg_t *n_msg, *m; \ 1105 m = (head)->next; \ 1106 (head)->next = NULL; \ 1107 while (m != NULL) { \ 1108 n_msg = m->next; \ 1109 FREE_MSG(m); \ 1110 m = n_msg; \ 1111 } \ 1112 } 1113 1114 #define SCF_PUTINFO(f, s, p) \ 1115 f(KEY_ESCF, 0x01, 0, s, p) 1116 1117 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1118 (m)->len, (m)->data)) == 0) 1119 1120 /* 1121 * The value of the following macro loosely depends on the 1122 * value of the "device busy" timeout used in the SCF driver. 1123 * (See pass2xscf_thread()). 1124 */ 1125 #define SCF_DEVBUSY_DELAY 10 1126 1127 /* 1128 * The default number of attempts to contact the scf driver 1129 * if we cannot fetch any information about the timeout value 1130 * it uses. 1131 */ 1132 1133 #define REPEATS 4 1134 1135 typedef struct nm_msg { 1136 struct nm_msg *next; 1137 struct nm_msg *prev; 1138 int len; 1139 char data[1]; 1140 } nm_msg_t; 1141 1142 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1143 1144 static struct ctlmsg { 1145 nm_msg_t *head; 1146 nm_msg_t *now_serving; 1147 kmutex_t nm_lock; 1148 kthread_t *nmt; 1149 int cnt; 1150 int (*scf_service_function)(uint32_t, uint8_t, 1151 uint32_t, uint32_t, void *); 1152 } ctl_msg; 1153 1154 static void 1155 post_xscf_msg(char *dp, int len) 1156 { 1157 nm_msg_t *msg; 1158 1159 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1160 1161 bcopy(dp, msg->data, len); 1162 msg->len = len; 1163 1164 mutex_enter(&ctl_msg.nm_lock); 1165 if (ctl_msg.nmt == NULL) { 1166 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1167 NULL, 0, &p0, TS_RUN, minclsyspri); 1168 } 1169 1170 PUSH(msg); 1171 ctl_msg.cnt++; 1172 mutex_exit(&ctl_msg.nm_lock); 1173 } 1174 1175 static void 1176 pass2xscf_thread() 1177 { 1178 nm_msg_t *msg; 1179 int ret; 1180 uint_t i, msg_sent, xscf_driver_delay; 1181 static uint_t repeat_cnt; 1182 uint_t *scf_wait_cnt; 1183 1184 mutex_enter(&ctl_msg.nm_lock); 1185 1186 /* 1187 * Find the address of the SCF put routine if it's not done yet. 1188 */ 1189 if (ctl_msg.scf_service_function == NULL) { 1190 if ((ctl_msg.scf_service_function = 1191 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1192 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1193 cmn_err(CE_NOTE, "pass2xscf_thread: " 1194 "scf_service_putinfo not found\n"); 1195 ctl_msg.nmt = NULL; 1196 mutex_exit(&ctl_msg.nm_lock); 1197 return; 1198 } 1199 } 1200 1201 /* 1202 * Calculate the number of attempts to connect XSCF based on the 1203 * scf driver delay (which is 1204 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1205 * of xscf_connect_delay (the total number of seconds to wait 1206 * till xscf get ready.) 1207 */ 1208 if (repeat_cnt == 0) { 1209 if ((scf_wait_cnt = 1210 (uint_t *) 1211 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1212 repeat_cnt = REPEATS; 1213 } else { 1214 1215 xscf_driver_delay = *scf_wait_cnt * 1216 SCF_DEVBUSY_DELAY; 1217 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1218 } 1219 } 1220 1221 while (ctl_msg.cnt != 0) { 1222 1223 /* 1224 * Take the very last request from the queue, 1225 */ 1226 ctl_msg.now_serving = ctl_msg.head; 1227 ASSERT(ctl_msg.now_serving != NULL); 1228 1229 /* 1230 * and discard all the others if any. 1231 */ 1232 FREE_THE_TAIL(ctl_msg.now_serving); 1233 ctl_msg.cnt = 1; 1234 mutex_exit(&ctl_msg.nm_lock); 1235 1236 /* 1237 * Pass the name to XSCF. Note please, we do not hold the 1238 * mutex while we are doing this. 1239 */ 1240 msg_sent = 0; 1241 for (i = 0; i < repeat_cnt; i++) { 1242 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1243 msg_sent = 1; 1244 break; 1245 } else { 1246 if (ret != EBUSY) { 1247 cmn_err(CE_NOTE, "pass2xscf_thread:" 1248 " unexpected return code" 1249 " from scf_service_putinfo():" 1250 " %d\n", ret); 1251 } 1252 } 1253 } 1254 1255 if (msg_sent) { 1256 1257 /* 1258 * Remove the request from the list 1259 */ 1260 mutex_enter(&ctl_msg.nm_lock); 1261 msg = ctl_msg.now_serving; 1262 ctl_msg.now_serving = NULL; 1263 REMOVE(msg); 1264 ctl_msg.cnt--; 1265 mutex_exit(&ctl_msg.nm_lock); 1266 FREE_MSG(msg); 1267 } else { 1268 1269 /* 1270 * If while we have tried to communicate with 1271 * XSCF there were any other requests we are 1272 * going to drop this one and take the latest 1273 * one. Otherwise we will try to pass this one 1274 * again. 1275 */ 1276 cmn_err(CE_NOTE, 1277 "pass2xscf_thread: " 1278 "scf_service_putinfo " 1279 "not responding\n"); 1280 } 1281 mutex_enter(&ctl_msg.nm_lock); 1282 } 1283 1284 /* 1285 * The request queue is empty, exit. 1286 */ 1287 ctl_msg.nmt = NULL; 1288 mutex_exit(&ctl_msg.nm_lock); 1289 } 1290