1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/cpuvar.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/promif.h> 32 #include <sys/platform_module.h> 33 #include <sys/cmn_err.h> 34 #include <sys/errno.h> 35 #include <sys/machsystm.h> 36 #include <sys/bootconf.h> 37 #include <sys/nvpair.h> 38 #include <sys/kobj.h> 39 #include <sys/mem_cage.h> 40 #include <sys/opl.h> 41 #include <sys/scfd/scfostoescf.h> 42 #include <sys/cpu_sgnblk_defs.h> 43 #include <sys/utsname.h> 44 #include <sys/ddi.h> 45 #include <sys/sunndi.h> 46 #include <sys/lgrp.h> 47 #include <sys/memnode.h> 48 #include <sys/sysmacros.h> 49 #include <sys/time.h> 50 #include <sys/cpu.h> 51 #include <vm/vm_dep.h> 52 53 int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *); 54 int (*opl_get_mem_sid)(char *unum, char *buf, int buflen, int *lenp); 55 int (*opl_get_mem_offset)(uint64_t paddr, uint64_t *offp); 56 int (*opl_get_mem_addr)(char *unum, char *sid, 57 uint64_t offset, uint64_t *paddr); 58 59 /* Memory for fcode claims. 16k times # maximum possible IO units */ 60 #define EFCODE_SIZE (OPL_MAX_BOARDS * OPL_MAX_IO_UNITS_PER_BOARD * 0x4000) 61 int efcode_size = EFCODE_SIZE; 62 63 #define OPL_MC_MEMBOARD_SHIFT 38 /* Boards on 256BG boundary */ 64 65 /* Set the maximum number of boards for DR */ 66 int opl_boards = OPL_MAX_BOARDS; 67 68 void sgn_update_all_cpus(ushort_t, uchar_t, uchar_t); 69 70 extern int tsb_lgrp_affinity; 71 72 int opl_tsb_spares = (OPL_MAX_BOARDS) * (OPL_MAX_PCICH_UNITS_PER_BOARD) * 73 (OPL_MAX_TSBS_PER_PCICH); 74 75 pgcnt_t opl_startup_cage_size = 0; 76 77 /* 78 * The length of the delay in seconds in communication with XSCF after 79 * which the warning message will be logged. 80 */ 81 uint_t xscf_connect_delay = 60 * 15; 82 83 static opl_model_info_t opl_models[] = { 84 { "FF1", OPL_MAX_BOARDS_FF1, FF1, STD_DISPATCH_TABLE }, 85 { "FF2", OPL_MAX_BOARDS_FF2, FF2, STD_DISPATCH_TABLE }, 86 { "DC1", OPL_MAX_BOARDS_DC1, DC1, STD_DISPATCH_TABLE }, 87 { "DC2", OPL_MAX_BOARDS_DC2, DC2, EXT_DISPATCH_TABLE }, 88 { "DC3", OPL_MAX_BOARDS_DC3, DC3, EXT_DISPATCH_TABLE }, 89 }; 90 static int opl_num_models = sizeof (opl_models)/sizeof (opl_model_info_t); 91 92 /* 93 * opl_cur_model 94 */ 95 static opl_model_info_t *opl_cur_model = NULL; 96 97 static struct memlist *opl_memlist_per_board(struct memlist *ml); 98 static void post_xscf_msg(char *, int); 99 static void pass2xscf_thread(); 100 101 /* 102 * Note FF/DC out-of-order instruction engine takes only a 103 * single cycle to execute each spin loop 104 * for comparison, Panther takes 6 cycles for same loop 105 * 1500 approx nsec for OPL sleep instruction 106 * if spin count = OPL_BOFF_SLEEP*OPL_BOFF_SPIN then 107 * spin time should be equal to OPL_BOFF_TM nsecs 108 * Listed values tuned for 2.15GHz to 2.4GHz systems 109 * Value may change for future systems 110 */ 111 #define OPL_BOFF_SPIN 720 112 #define OPL_BOFF_BASE 1 113 #define OPL_BOFF_SLEEP 5 114 #define OPL_BOFF_CAP1 20 115 #define OPL_BOFF_CAP2 60 116 #define OPL_BOFF_MAX (40 * OPL_BOFF_SLEEP) 117 #define OPL_BOFF_TM 1500 118 119 int 120 set_platform_max_ncpus(void) 121 { 122 return (OPL_MAX_CPU_PER_BOARD * OPL_MAX_BOARDS); 123 } 124 125 int 126 set_platform_tsb_spares(void) 127 { 128 return (MIN(opl_tsb_spares, MAX_UPA)); 129 } 130 131 static void 132 set_model_info() 133 { 134 extern int ts_dispatch_extended; 135 char name[MAXSYSNAME]; 136 int i; 137 138 /* 139 * Get model name from the root node. 140 * 141 * We are using the prom device tree since, at this point, 142 * the Solaris device tree is not yet setup. 143 */ 144 (void) prom_getprop(prom_rootnode(), "model", (caddr_t)name); 145 146 for (i = 0; i < opl_num_models; i++) { 147 if (strncmp(name, opl_models[i].model_name, MAXSYSNAME) == 0) { 148 opl_cur_model = &opl_models[i]; 149 break; 150 } 151 } 152 153 /* 154 * If model not matched, it's an unknown model. 155 * just return. 156 */ 157 if (i == opl_num_models) 158 return; 159 160 if ((opl_cur_model->model_cmds & EXT_DISPATCH_TABLE) && 161 (ts_dispatch_extended == -1)) { 162 /* 163 * Based on a platform model, select a dispatch table. 164 * Only DC2 and DC3 systems uses the alternate/extended 165 * TS dispatch table. 166 * FF1, FF2 and DC1 systems used standard dispatch tables. 167 */ 168 ts_dispatch_extended = 1; 169 } 170 171 } 172 173 static void 174 set_max_mmu_ctxdoms() 175 { 176 extern uint_t max_mmu_ctxdoms; 177 int max_boards; 178 179 /* 180 * From the model, get the maximum number of boards 181 * supported and set the value accordingly. If the model 182 * could not be determined or recognized, we assume the max value. 183 */ 184 if (opl_cur_model == NULL) 185 max_boards = OPL_MAX_BOARDS; 186 else 187 max_boards = opl_cur_model->model_max_boards; 188 189 /* 190 * On OPL, cores and MMUs are one-to-one. 191 */ 192 max_mmu_ctxdoms = OPL_MAX_CORE_UNITS_PER_BOARD * max_boards; 193 } 194 195 #pragma weak mmu_init_large_pages 196 197 void 198 set_platform_defaults(void) 199 { 200 extern char *tod_module_name; 201 extern void cpu_sgn_update(ushort_t, uchar_t, uchar_t, int); 202 extern void mmu_init_large_pages(size_t); 203 204 /* Set the CPU signature function pointer */ 205 cpu_sgn_func = cpu_sgn_update; 206 207 /* Set appropriate tod module for OPL platform */ 208 ASSERT(tod_module_name == NULL); 209 tod_module_name = "todopl"; 210 211 if ((mmu_page_sizes == max_mmu_page_sizes) && 212 (mmu_ism_pagesize != DEFAULT_ISM_PAGESIZE)) { 213 if (&mmu_init_large_pages) 214 mmu_init_large_pages(mmu_ism_pagesize); 215 } 216 217 tsb_lgrp_affinity = 1; 218 219 set_max_mmu_ctxdoms(); 220 } 221 222 /* 223 * Convert logical a board number to a physical one. 224 */ 225 226 #define LSBPROP "board#" 227 #define PSBPROP "physical-board#" 228 229 int 230 opl_get_physical_board(int id) 231 { 232 dev_info_t *root_dip, *dip = NULL; 233 char *dname = NULL; 234 int circ; 235 236 pnode_t pnode; 237 char pname[MAXSYSNAME] = {0}; 238 239 int lsb_id; /* Logical System Board ID */ 240 int psb_id; /* Physical System Board ID */ 241 242 243 /* 244 * This function is called on early stage of bootup when the 245 * kernel device tree is not initialized yet, and also 246 * later on when the device tree is up. We want to try 247 * the fast track first. 248 */ 249 root_dip = ddi_root_node(); 250 if (root_dip) { 251 /* Get from devinfo node */ 252 ndi_devi_enter(root_dip, &circ); 253 for (dip = ddi_get_child(root_dip); dip; 254 dip = ddi_get_next_sibling(dip)) { 255 256 dname = ddi_node_name(dip); 257 if (strncmp(dname, "pseudo-mc", 9) != 0) 258 continue; 259 260 if ((lsb_id = (int)ddi_getprop(DDI_DEV_T_ANY, dip, 261 DDI_PROP_DONTPASS, LSBPROP, -1)) == -1) 262 continue; 263 264 if (id == lsb_id) { 265 if ((psb_id = (int)ddi_getprop(DDI_DEV_T_ANY, 266 dip, DDI_PROP_DONTPASS, PSBPROP, -1)) 267 == -1) { 268 ndi_devi_exit(root_dip, circ); 269 return (-1); 270 } else { 271 ndi_devi_exit(root_dip, circ); 272 return (psb_id); 273 } 274 } 275 } 276 ndi_devi_exit(root_dip, circ); 277 } 278 279 /* 280 * We do not have the kernel device tree, or we did not 281 * find the node for some reason (let's say the kernel 282 * device tree was modified), let's try the OBP tree. 283 */ 284 pnode = prom_rootnode(); 285 for (pnode = prom_childnode(pnode); pnode; 286 pnode = prom_nextnode(pnode)) { 287 288 if ((prom_getprop(pnode, "name", (caddr_t)pname) == -1) || 289 (strncmp(pname, "pseudo-mc", 9) != 0)) 290 continue; 291 292 if (prom_getprop(pnode, LSBPROP, (caddr_t)&lsb_id) == -1) 293 continue; 294 295 if (id == lsb_id) { 296 if (prom_getprop(pnode, PSBPROP, 297 (caddr_t)&psb_id) == -1) { 298 return (-1); 299 } else { 300 return (psb_id); 301 } 302 } 303 } 304 305 return (-1); 306 } 307 308 /* 309 * For OPL it's possible that memory from two or more successive boards 310 * will be contiguous across the boards, and therefore represented as a 311 * single chunk. 312 * This function splits such chunks down the board boundaries. 313 */ 314 static struct memlist * 315 opl_memlist_per_board(struct memlist *ml) 316 { 317 uint64_t ssize, low, high, boundary; 318 struct memlist *head, *tail, *new; 319 320 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 321 322 head = tail = NULL; 323 324 for (; ml; ml = ml->next) { 325 low = (uint64_t)ml->address; 326 high = low+(uint64_t)(ml->size); 327 while (low < high) { 328 boundary = roundup(low+1, ssize); 329 boundary = MIN(high, boundary); 330 new = kmem_zalloc(sizeof (struct memlist), KM_SLEEP); 331 new->address = low; 332 new->size = boundary - low; 333 if (head == NULL) 334 head = new; 335 if (tail) { 336 tail->next = new; 337 new->prev = tail; 338 } 339 tail = new; 340 low = boundary; 341 } 342 } 343 return (head); 344 } 345 346 void 347 set_platform_cage_params(void) 348 { 349 extern pgcnt_t total_pages; 350 extern struct memlist *phys_avail; 351 struct memlist *ml, *tml; 352 353 if (kernel_cage_enable) { 354 pgcnt_t preferred_cage_size; 355 356 preferred_cage_size = MAX(opl_startup_cage_size, 357 total_pages / 256); 358 359 ml = opl_memlist_per_board(phys_avail); 360 361 /* 362 * Note: we are assuming that post has load the 363 * whole show in to the high end of memory. Having 364 * taken this leap, we copy the whole of phys_avail 365 * the glist and arrange for the cage to grow 366 * downward (descending pfns). 367 */ 368 kcage_range_init(ml, KCAGE_DOWN, preferred_cage_size); 369 370 /* free the memlist */ 371 do { 372 tml = ml->next; 373 kmem_free(ml, sizeof (struct memlist)); 374 ml = tml; 375 } while (ml != NULL); 376 } 377 378 if (kcage_on) 379 cmn_err(CE_NOTE, "!DR Kernel Cage is ENABLED"); 380 else 381 cmn_err(CE_NOTE, "!DR Kernel Cage is DISABLED"); 382 } 383 384 /*ARGSUSED*/ 385 int 386 plat_cpu_poweron(struct cpu *cp) 387 { 388 int (*opl_cpu_poweron)(struct cpu *) = NULL; 389 390 opl_cpu_poweron = 391 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweron", 0); 392 393 if (opl_cpu_poweron == NULL) 394 return (ENOTSUP); 395 else 396 return ((opl_cpu_poweron)(cp)); 397 398 } 399 400 /*ARGSUSED*/ 401 int 402 plat_cpu_poweroff(struct cpu *cp) 403 { 404 int (*opl_cpu_poweroff)(struct cpu *) = NULL; 405 406 opl_cpu_poweroff = 407 (int (*)(struct cpu *))kobj_getsymvalue("drmach_cpu_poweroff", 0); 408 409 if (opl_cpu_poweroff == NULL) 410 return (ENOTSUP); 411 else 412 return ((opl_cpu_poweroff)(cp)); 413 414 } 415 416 int 417 plat_max_boards(void) 418 { 419 return (OPL_MAX_BOARDS); 420 } 421 422 int 423 plat_max_cpu_units_per_board(void) 424 { 425 return (OPL_MAX_CPU_PER_BOARD); 426 } 427 428 int 429 plat_max_mem_units_per_board(void) 430 { 431 return (OPL_MAX_MEM_UNITS_PER_BOARD); 432 } 433 434 int 435 plat_max_io_units_per_board(void) 436 { 437 return (OPL_MAX_IO_UNITS_PER_BOARD); 438 } 439 440 int 441 plat_max_cmp_units_per_board(void) 442 { 443 return (OPL_MAX_CMP_UNITS_PER_BOARD); 444 } 445 446 int 447 plat_max_core_units_per_board(void) 448 { 449 return (OPL_MAX_CORE_UNITS_PER_BOARD); 450 } 451 452 int 453 plat_pfn_to_mem_node(pfn_t pfn) 454 { 455 return (pfn >> mem_node_pfn_shift); 456 } 457 458 /* ARGSUSED */ 459 void 460 plat_build_mem_nodes(prom_memlist_t *list, size_t nelems) 461 { 462 size_t elem; 463 pfn_t basepfn; 464 pgcnt_t npgs; 465 uint64_t boundary, ssize; 466 uint64_t low, high; 467 468 /* 469 * OPL mem slices are always aligned on a 256GB boundary. 470 */ 471 mem_node_pfn_shift = OPL_MC_MEMBOARD_SHIFT - MMU_PAGESHIFT; 472 mem_node_physalign = 0; 473 474 /* 475 * Boot install lists are arranged <addr, len>, <addr, len>, ... 476 */ 477 ssize = (1ull << OPL_MC_MEMBOARD_SHIFT); 478 for (elem = 0; elem < nelems; list++, elem++) { 479 low = list->addr; 480 high = low + list->size; 481 while (low < high) { 482 boundary = roundup(low+1, ssize); 483 boundary = MIN(high, boundary); 484 basepfn = btop(low); 485 npgs = btop(boundary - low); 486 mem_node_add_slice(basepfn, basepfn + npgs - 1); 487 low = boundary; 488 } 489 } 490 } 491 492 /* 493 * Find the CPU associated with a slice at boot-time. 494 */ 495 void 496 plat_fill_mc(pnode_t nodeid) 497 { 498 int board; 499 int memnode; 500 struct { 501 uint64_t addr; 502 uint64_t size; 503 } mem_range; 504 505 if (prom_getprop(nodeid, "board#", (caddr_t)&board) < 0) { 506 panic("Can not find board# property in mc node %x", nodeid); 507 } 508 if (prom_getprop(nodeid, "sb-mem-ranges", (caddr_t)&mem_range) < 0) { 509 panic("Can not find sb-mem-ranges property in mc node %x", 510 nodeid); 511 } 512 memnode = mem_range.addr >> OPL_MC_MEMBOARD_SHIFT; 513 plat_assign_lgrphand_to_mem_node(board, memnode); 514 } 515 516 /* 517 * Return the platform handle for the lgroup containing the given CPU 518 * 519 * For OPL, lgroup platform handle == board #. 520 */ 521 522 extern int mpo_disabled; 523 extern lgrp_handle_t lgrp_default_handle; 524 525 lgrp_handle_t 526 plat_lgrp_cpu_to_hand(processorid_t id) 527 { 528 lgrp_handle_t plathand; 529 530 /* 531 * Return the real platform handle for the CPU until 532 * such time as we know that MPO should be disabled. 533 * At that point, we set the "mpo_disabled" flag to true, 534 * and from that point on, return the default handle. 535 * 536 * By the time we know that MPO should be disabled, the 537 * first CPU will have already been added to a leaf 538 * lgroup, but that's ok. The common lgroup code will 539 * double check that the boot CPU is in the correct place, 540 * and in the case where mpo should be disabled, will move 541 * it to the root if necessary. 542 */ 543 if (mpo_disabled) { 544 /* If MPO is disabled, return the default (UMA) handle */ 545 plathand = lgrp_default_handle; 546 } else 547 plathand = (lgrp_handle_t)LSB_ID(id); 548 return (plathand); 549 } 550 551 /* 552 * Platform specific lgroup initialization 553 */ 554 void 555 plat_lgrp_init(void) 556 { 557 extern uint32_t lgrp_expand_proc_thresh; 558 extern uint32_t lgrp_expand_proc_diff; 559 560 /* 561 * Set tuneables for the OPL architecture 562 * 563 * lgrp_expand_proc_thresh is the minimum load on the lgroups 564 * this process is currently running on before considering 565 * expanding threads to another lgroup. 566 * 567 * lgrp_expand_proc_diff determines how much less the remote lgroup 568 * must be loaded before expanding to it. 569 * 570 * Since remote latencies can be costly, attempt to keep 3 threads 571 * within the same lgroup before expanding to the next lgroup. 572 */ 573 lgrp_expand_proc_thresh = LGRP_LOADAVG_THREAD_MAX * 3; 574 lgrp_expand_proc_diff = LGRP_LOADAVG_THREAD_MAX; 575 } 576 577 /* 578 * Platform notification of lgroup (re)configuration changes 579 */ 580 /*ARGSUSED*/ 581 void 582 plat_lgrp_config(lgrp_config_flag_t evt, uintptr_t arg) 583 { 584 update_membounds_t *umb; 585 lgrp_config_mem_rename_t lmr; 586 int sbd, tbd; 587 lgrp_handle_t hand, shand, thand; 588 int mnode, snode, tnode; 589 pfn_t start, end; 590 591 if (mpo_disabled) 592 return; 593 594 switch (evt) { 595 596 case LGRP_CONFIG_MEM_ADD: 597 /* 598 * Establish the lgroup handle to memnode translation. 599 */ 600 umb = (update_membounds_t *)arg; 601 602 hand = umb->u_board; 603 mnode = plat_pfn_to_mem_node(umb->u_base >> MMU_PAGESHIFT); 604 plat_assign_lgrphand_to_mem_node(hand, mnode); 605 606 break; 607 608 case LGRP_CONFIG_MEM_DEL: 609 /* 610 * Special handling for possible memory holes. 611 */ 612 umb = (update_membounds_t *)arg; 613 hand = umb->u_board; 614 if ((mnode = plat_lgrphand_to_mem_node(hand)) != -1) { 615 if (mem_node_config[mnode].exists) { 616 start = mem_node_config[mnode].physbase; 617 end = mem_node_config[mnode].physmax; 618 mem_node_pre_del_slice(start, end); 619 mem_node_post_del_slice(start, end, 0); 620 } 621 } 622 623 break; 624 625 case LGRP_CONFIG_MEM_RENAME: 626 /* 627 * During a DR copy-rename operation, all of the memory 628 * on one board is moved to another board -- but the 629 * addresses/pfns and memnodes don't change. This means 630 * the memory has changed locations without changing identity. 631 * 632 * Source is where we are copying from and target is where we 633 * are copying to. After source memnode is copied to target 634 * memnode, the physical addresses of the target memnode are 635 * renamed to match what the source memnode had. Then target 636 * memnode can be removed and source memnode can take its 637 * place. 638 * 639 * To do this, swap the lgroup handle to memnode mappings for 640 * the boards, so target lgroup will have source memnode and 641 * source lgroup will have empty target memnode which is where 642 * its memory will go (if any is added to it later). 643 * 644 * Then source memnode needs to be removed from its lgroup 645 * and added to the target lgroup where the memory was living 646 * but under a different name/memnode. The memory was in the 647 * target memnode and now lives in the source memnode with 648 * different physical addresses even though it is the same 649 * memory. 650 */ 651 sbd = arg & 0xffff; 652 tbd = (arg & 0xffff0000) >> 16; 653 shand = sbd; 654 thand = tbd; 655 snode = plat_lgrphand_to_mem_node(shand); 656 tnode = plat_lgrphand_to_mem_node(thand); 657 658 /* 659 * Special handling for possible memory holes. 660 */ 661 if (tnode != -1 && mem_node_config[tnode].exists) { 662 start = mem_node_config[tnode].physbase; 663 end = mem_node_config[tnode].physmax; 664 mem_node_pre_del_slice(start, end); 665 mem_node_post_del_slice(start, end, 0); 666 } 667 668 plat_assign_lgrphand_to_mem_node(thand, snode); 669 plat_assign_lgrphand_to_mem_node(shand, tnode); 670 671 lmr.lmem_rename_from = shand; 672 lmr.lmem_rename_to = thand; 673 674 /* 675 * Remove source memnode of copy rename from its lgroup 676 * and add it to its new target lgroup 677 */ 678 lgrp_config(LGRP_CONFIG_MEM_RENAME, (uintptr_t)snode, 679 (uintptr_t)&lmr); 680 681 break; 682 683 default: 684 break; 685 } 686 } 687 688 /* 689 * Return latency between "from" and "to" lgroups 690 * 691 * This latency number can only be used for relative comparison 692 * between lgroups on the running system, cannot be used across platforms, 693 * and may not reflect the actual latency. It is platform and implementation 694 * specific, so platform gets to decide its value. It would be nice if the 695 * number was at least proportional to make comparisons more meaningful though. 696 * NOTE: The numbers below are supposed to be load latencies for uncached 697 * memory divided by 10. 698 * 699 */ 700 int 701 plat_lgrp_latency(lgrp_handle_t from, lgrp_handle_t to) 702 { 703 /* 704 * Return min remote latency when there are more than two lgroups 705 * (root and child) and getting latency between two different lgroups 706 * or root is involved 707 */ 708 if (lgrp_optimizations() && (from != to || 709 from == LGRP_DEFAULT_HANDLE || to == LGRP_DEFAULT_HANDLE)) 710 return (42); 711 else 712 return (35); 713 } 714 715 /* 716 * Return platform handle for root lgroup 717 */ 718 lgrp_handle_t 719 plat_lgrp_root_hand(void) 720 { 721 if (mpo_disabled) 722 return (lgrp_default_handle); 723 724 return (LGRP_DEFAULT_HANDLE); 725 } 726 727 /*ARGSUSED*/ 728 void 729 plat_freelist_process(int mnode) 730 { 731 } 732 733 void 734 load_platform_drivers(void) 735 { 736 (void) i_ddi_attach_pseudo_node("dr"); 737 } 738 739 /* 740 * No platform drivers on this platform 741 */ 742 char *platform_module_list[] = { 743 (char *)0 744 }; 745 746 /*ARGSUSED*/ 747 void 748 plat_tod_fault(enum tod_fault_type tod_bad) 749 { 750 } 751 752 /*ARGSUSED*/ 753 void 754 cpu_sgn_update(ushort_t sgn, uchar_t state, uchar_t sub_state, int cpuid) 755 { 756 static void (*scf_panic_callback)(int); 757 static void (*scf_shutdown_callback)(int); 758 759 /* 760 * This is for notifing system panic/shutdown to SCF. 761 * In case of shutdown and panic, SCF call back 762 * function should be called. 763 * <SCF call back functions> 764 * scf_panic_callb() : panicsys()->panic_quiesce_hw() 765 * scf_shutdown_callb(): halt() or power_down() or reboot_machine() 766 * cpuid should be -1 and state should be SIGST_EXIT. 767 */ 768 if (state == SIGST_EXIT && cpuid == -1) { 769 770 /* 771 * find the symbol for the SCF panic callback routine in driver 772 */ 773 if (scf_panic_callback == NULL) 774 scf_panic_callback = (void (*)(int)) 775 modgetsymvalue("scf_panic_callb", 0); 776 if (scf_shutdown_callback == NULL) 777 scf_shutdown_callback = (void (*)(int)) 778 modgetsymvalue("scf_shutdown_callb", 0); 779 780 switch (sub_state) { 781 case SIGSUBST_PANIC: 782 if (scf_panic_callback == NULL) { 783 cmn_err(CE_NOTE, "!cpu_sgn_update: " 784 "scf_panic_callb not found\n"); 785 return; 786 } 787 scf_panic_callback(SIGSUBST_PANIC); 788 break; 789 790 case SIGSUBST_HALT: 791 if (scf_shutdown_callback == NULL) { 792 cmn_err(CE_NOTE, "!cpu_sgn_update: " 793 "scf_shutdown_callb not found\n"); 794 return; 795 } 796 scf_shutdown_callback(SIGSUBST_HALT); 797 break; 798 799 case SIGSUBST_ENVIRON: 800 if (scf_shutdown_callback == NULL) { 801 cmn_err(CE_NOTE, "!cpu_sgn_update: " 802 "scf_shutdown_callb not found\n"); 803 return; 804 } 805 scf_shutdown_callback(SIGSUBST_ENVIRON); 806 break; 807 808 case SIGSUBST_REBOOT: 809 if (scf_shutdown_callback == NULL) { 810 cmn_err(CE_NOTE, "!cpu_sgn_update: " 811 "scf_shutdown_callb not found\n"); 812 return; 813 } 814 scf_shutdown_callback(SIGSUBST_REBOOT); 815 break; 816 } 817 } 818 } 819 820 /*ARGSUSED*/ 821 int 822 plat_get_mem_unum(int synd_code, uint64_t flt_addr, int flt_bus_id, 823 int flt_in_memory, ushort_t flt_status, 824 char *buf, int buflen, int *lenp) 825 { 826 /* 827 * check if it's a Memory error. 828 */ 829 if (flt_in_memory) { 830 if (opl_get_mem_unum != NULL) { 831 return (opl_get_mem_unum(synd_code, flt_addr, buf, 832 buflen, lenp)); 833 } else { 834 return (ENOTSUP); 835 } 836 } else { 837 return (ENOTSUP); 838 } 839 } 840 841 /*ARGSUSED*/ 842 int 843 plat_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp) 844 { 845 int ret = 0; 846 int sb; 847 int plen; 848 849 sb = opl_get_physical_board(LSB_ID(cpuid)); 850 if (sb == -1) { 851 return (ENXIO); 852 } 853 854 /* 855 * opl_cur_model is assigned here 856 */ 857 if (opl_cur_model == NULL) { 858 set_model_info(); 859 860 /* 861 * if not matched, return 862 */ 863 if (opl_cur_model == NULL) 864 return (ENODEV); 865 } 866 867 ASSERT((opl_cur_model - opl_models) == (opl_cur_model->model_type)); 868 869 switch (opl_cur_model->model_type) { 870 case FF1: 871 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_A", 872 CHIP_ID(cpuid) / 2); 873 break; 874 875 case FF2: 876 plen = snprintf(buf, buflen, "/%s/CPUM%d", "MBU_B", 877 (CHIP_ID(cpuid) / 2) + (sb * 2)); 878 break; 879 880 case DC1: 881 case DC2: 882 case DC3: 883 plen = snprintf(buf, buflen, "/%s%02d/CPUM%d", "CMU", sb, 884 CHIP_ID(cpuid)); 885 break; 886 887 default: 888 /* This should never happen */ 889 return (ENODEV); 890 } 891 892 if (plen >= buflen) { 893 ret = ENOSPC; 894 } else { 895 if (lenp) 896 *lenp = strlen(buf); 897 } 898 return (ret); 899 } 900 901 void 902 plat_nodename_set(void) 903 { 904 post_xscf_msg((char *)&utsname, sizeof (struct utsname)); 905 } 906 907 caddr_t efcode_vaddr = NULL; 908 909 /* 910 * Preallocate enough memory for fcode claims. 911 */ 912 913 caddr_t 914 efcode_alloc(caddr_t alloc_base) 915 { 916 caddr_t efcode_alloc_base = (caddr_t)roundup((uintptr_t)alloc_base, 917 MMU_PAGESIZE); 918 caddr_t vaddr; 919 920 /* 921 * allocate the physical memory for the Oberon fcode. 922 */ 923 if ((vaddr = (caddr_t)BOP_ALLOC(bootops, efcode_alloc_base, 924 efcode_size, MMU_PAGESIZE)) == NULL) 925 cmn_err(CE_PANIC, "Cannot allocate Efcode Memory"); 926 927 efcode_vaddr = vaddr; 928 929 return (efcode_alloc_base + efcode_size); 930 } 931 932 caddr_t 933 plat_startup_memlist(caddr_t alloc_base) 934 { 935 caddr_t tmp_alloc_base; 936 937 tmp_alloc_base = efcode_alloc(alloc_base); 938 tmp_alloc_base = 939 (caddr_t)roundup((uintptr_t)tmp_alloc_base, ecache_alignsize); 940 return (tmp_alloc_base); 941 } 942 943 void 944 startup_platform(void) 945 { 946 } 947 948 void 949 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info) 950 { 951 int impl; 952 953 impl = cpunodes[cpuid].implementation; 954 if (IS_OLYMPUS_C(impl) || IS_JUPITER(impl)) { 955 info->mmu_idx = MMU_ID(cpuid); 956 info->mmu_nctxs = 8192; 957 } else { 958 cmn_err(CE_PANIC, "Unknown processor %d", impl); 959 } 960 } 961 962 int 963 plat_get_mem_sid(char *unum, char *buf, int buflen, int *lenp) 964 { 965 if (opl_get_mem_sid == NULL) { 966 return (ENOTSUP); 967 } 968 return (opl_get_mem_sid(unum, buf, buflen, lenp)); 969 } 970 971 int 972 plat_get_mem_offset(uint64_t paddr, uint64_t *offp) 973 { 974 if (opl_get_mem_offset == NULL) { 975 return (ENOTSUP); 976 } 977 return (opl_get_mem_offset(paddr, offp)); 978 } 979 980 int 981 plat_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp) 982 { 983 if (opl_get_mem_addr == NULL) { 984 return (ENOTSUP); 985 } 986 return (opl_get_mem_addr(unum, sid, offset, addrp)); 987 } 988 989 void 990 plat_lock_delay(int *backoff) 991 { 992 int i; 993 int cnt; 994 int flag; 995 int ctr; 996 hrtime_t delay_start; 997 /* 998 * Platform specific lock delay code for OPL 999 * 1000 * Using staged linear increases in the delay. 1001 * The sleep instruction is the preferred method of delay, 1002 * but is too large of granularity for the initial backoff. 1003 */ 1004 1005 if (*backoff == 0) *backoff = OPL_BOFF_BASE; 1006 1007 flag = !*backoff; 1008 1009 if (*backoff < OPL_BOFF_CAP1) { 1010 /* 1011 * If desired backoff is long enough, 1012 * use sleep for most of it 1013 */ 1014 for (cnt = *backoff; cnt >= OPL_BOFF_SLEEP; 1015 cnt -= OPL_BOFF_SLEEP) { 1016 cpu_smt_pause(); 1017 } 1018 /* 1019 * spin for small remainder of backoff 1020 * 1021 * fake call to nulldev included to prevent 1022 * compiler from optimizing out the spin loop 1023 */ 1024 for (ctr = cnt * OPL_BOFF_SPIN; ctr; ctr--) { 1025 if (flag) (void) nulldev(); 1026 } 1027 } else { 1028 /* backoff is very large. Fill it by sleeping */ 1029 delay_start = gethrtime(); 1030 cnt = *backoff/OPL_BOFF_SLEEP; 1031 /* 1032 * use sleep instructions for delay 1033 */ 1034 for (i = 0; i < cnt; i++) { 1035 cpu_smt_pause(); 1036 } 1037 1038 /* 1039 * Note: if the other strand executes a sleep instruction, 1040 * then the sleep ends immediately with a minimum time of 1041 * 42 clocks. We check gethrtime to insure we have 1042 * waited long enough. And we include both a short 1043 * spin loop and a sleep for any final delay time. 1044 */ 1045 1046 while ((gethrtime() - delay_start) < cnt * OPL_BOFF_TM) { 1047 cpu_smt_pause(); 1048 for (ctr = OPL_BOFF_SPIN; ctr; ctr--) { 1049 if (flag) (void) nulldev(); 1050 } 1051 } 1052 } 1053 1054 /* 1055 * We adjust the backoff in three linear stages 1056 * The initial stage has small increases as this phase is 1057 * usually handle locks with light contention. We don't want 1058 * to have a long backoff on a lock that is available. 1059 * 1060 * In the second stage, we are in transition, unsure whether 1061 * the lock is under heavy contention. As the failures to 1062 * obtain the lock increase, we back off further. 1063 * 1064 * For the final stage, we are in a heavily contended or 1065 * long held long so we want to reduce the number of tries. 1066 */ 1067 if (*backoff < OPL_BOFF_CAP1) { 1068 *backoff += 1; 1069 } else { 1070 if (*backoff < OPL_BOFF_CAP2) { 1071 *backoff += OPL_BOFF_SLEEP; 1072 } else { 1073 *backoff += 2 * OPL_BOFF_SLEEP; 1074 } 1075 if (*backoff > OPL_BOFF_MAX) { 1076 *backoff = OPL_BOFF_MAX; 1077 } 1078 } 1079 } 1080 1081 /* 1082 * The following code implements asynchronous call to XSCF to setup the 1083 * domain node name. 1084 */ 1085 1086 #define FREE_MSG(m) kmem_free((m), NM_LEN((m)->len)) 1087 1088 /* 1089 * The following three macros define the all operations on the request 1090 * list we are using here, and hide the details of the list 1091 * implementation from the code. 1092 */ 1093 #define PUSH(m) \ 1094 { \ 1095 (m)->next = ctl_msg.head; \ 1096 (m)->prev = NULL; \ 1097 if ((m)->next != NULL) \ 1098 (m)->next->prev = (m); \ 1099 ctl_msg.head = (m); \ 1100 } 1101 1102 #define REMOVE(m) \ 1103 { \ 1104 if ((m)->prev != NULL) \ 1105 (m)->prev->next = (m)->next; \ 1106 else \ 1107 ctl_msg.head = (m)->next; \ 1108 if ((m)->next != NULL) \ 1109 (m)->next->prev = (m)->prev; \ 1110 } 1111 1112 #define FREE_THE_TAIL(head) \ 1113 { \ 1114 nm_msg_t *n_msg, *m; \ 1115 m = (head)->next; \ 1116 (head)->next = NULL; \ 1117 while (m != NULL) { \ 1118 n_msg = m->next; \ 1119 FREE_MSG(m); \ 1120 m = n_msg; \ 1121 } \ 1122 } 1123 1124 #define SCF_PUTINFO(f, s, p) \ 1125 f(KEY_ESCF, 0x01, 0, s, p) 1126 1127 #define PASS2XSCF(m, r) ((r = SCF_PUTINFO(ctl_msg.scf_service_function, \ 1128 (m)->len, (m)->data)) == 0) 1129 1130 /* 1131 * The value of the following macro loosely depends on the 1132 * value of the "device busy" timeout used in the SCF driver. 1133 * (See pass2xscf_thread()). 1134 */ 1135 #define SCF_DEVBUSY_DELAY 10 1136 1137 /* 1138 * The default number of attempts to contact the scf driver 1139 * if we cannot fetch any information about the timeout value 1140 * it uses. 1141 */ 1142 1143 #define REPEATS 4 1144 1145 typedef struct nm_msg { 1146 struct nm_msg *next; 1147 struct nm_msg *prev; 1148 int len; 1149 char data[1]; 1150 } nm_msg_t; 1151 1152 #define NM_LEN(len) (sizeof (nm_msg_t) + (len) - 1) 1153 1154 static struct ctlmsg { 1155 nm_msg_t *head; 1156 nm_msg_t *now_serving; 1157 kmutex_t nm_lock; 1158 kthread_t *nmt; 1159 int cnt; 1160 int (*scf_service_function)(uint32_t, uint8_t, 1161 uint32_t, uint32_t, void *); 1162 } ctl_msg; 1163 1164 static void 1165 post_xscf_msg(char *dp, int len) 1166 { 1167 nm_msg_t *msg; 1168 1169 msg = (nm_msg_t *)kmem_zalloc(NM_LEN(len), KM_SLEEP); 1170 1171 bcopy(dp, msg->data, len); 1172 msg->len = len; 1173 1174 mutex_enter(&ctl_msg.nm_lock); 1175 if (ctl_msg.nmt == NULL) { 1176 ctl_msg.nmt = thread_create(NULL, 0, pass2xscf_thread, 1177 NULL, 0, &p0, TS_RUN, minclsyspri); 1178 } 1179 1180 PUSH(msg); 1181 ctl_msg.cnt++; 1182 mutex_exit(&ctl_msg.nm_lock); 1183 } 1184 1185 static void 1186 pass2xscf_thread() 1187 { 1188 nm_msg_t *msg; 1189 int ret; 1190 uint_t i, msg_sent, xscf_driver_delay; 1191 static uint_t repeat_cnt; 1192 uint_t *scf_wait_cnt; 1193 1194 mutex_enter(&ctl_msg.nm_lock); 1195 1196 /* 1197 * Find the address of the SCF put routine if it's not done yet. 1198 */ 1199 if (ctl_msg.scf_service_function == NULL) { 1200 if ((ctl_msg.scf_service_function = 1201 (int (*)(uint32_t, uint8_t, uint32_t, uint32_t, void *)) 1202 modgetsymvalue("scf_service_putinfo", 0)) == NULL) { 1203 cmn_err(CE_NOTE, "pass2xscf_thread: " 1204 "scf_service_putinfo not found\n"); 1205 ctl_msg.nmt = NULL; 1206 mutex_exit(&ctl_msg.nm_lock); 1207 return; 1208 } 1209 } 1210 1211 /* 1212 * Calculate the number of attempts to connect XSCF based on the 1213 * scf driver delay (which is 1214 * SCF_DEVBUSY_DELAY*scf_online_wait_rcnt seconds) and the value 1215 * of xscf_connect_delay (the total number of seconds to wait 1216 * till xscf get ready.) 1217 */ 1218 if (repeat_cnt == 0) { 1219 if ((scf_wait_cnt = 1220 (uint_t *) 1221 modgetsymvalue("scf_online_wait_rcnt", 0)) == NULL) { 1222 repeat_cnt = REPEATS; 1223 } else { 1224 1225 xscf_driver_delay = *scf_wait_cnt * 1226 SCF_DEVBUSY_DELAY; 1227 repeat_cnt = (xscf_connect_delay/xscf_driver_delay) + 1; 1228 } 1229 } 1230 1231 while (ctl_msg.cnt != 0) { 1232 1233 /* 1234 * Take the very last request from the queue, 1235 */ 1236 ctl_msg.now_serving = ctl_msg.head; 1237 ASSERT(ctl_msg.now_serving != NULL); 1238 1239 /* 1240 * and discard all the others if any. 1241 */ 1242 FREE_THE_TAIL(ctl_msg.now_serving); 1243 ctl_msg.cnt = 1; 1244 mutex_exit(&ctl_msg.nm_lock); 1245 1246 /* 1247 * Pass the name to XSCF. Note please, we do not hold the 1248 * mutex while we are doing this. 1249 */ 1250 msg_sent = 0; 1251 for (i = 0; i < repeat_cnt; i++) { 1252 if (PASS2XSCF(ctl_msg.now_serving, ret)) { 1253 msg_sent = 1; 1254 break; 1255 } else { 1256 if (ret != EBUSY) { 1257 cmn_err(CE_NOTE, "pass2xscf_thread:" 1258 " unexpected return code" 1259 " from scf_service_putinfo():" 1260 " %d\n", ret); 1261 } 1262 } 1263 } 1264 1265 if (msg_sent) { 1266 1267 /* 1268 * Remove the request from the list 1269 */ 1270 mutex_enter(&ctl_msg.nm_lock); 1271 msg = ctl_msg.now_serving; 1272 ctl_msg.now_serving = NULL; 1273 REMOVE(msg); 1274 ctl_msg.cnt--; 1275 mutex_exit(&ctl_msg.nm_lock); 1276 FREE_MSG(msg); 1277 } else { 1278 1279 /* 1280 * If while we have tried to communicate with 1281 * XSCF there were any other requests we are 1282 * going to drop this one and take the latest 1283 * one. Otherwise we will try to pass this one 1284 * again. 1285 */ 1286 cmn_err(CE_NOTE, 1287 "pass2xscf_thread: " 1288 "scf_service_putinfo " 1289 "not responding\n"); 1290 } 1291 mutex_enter(&ctl_msg.nm_lock); 1292 } 1293 1294 /* 1295 * The request queue is empty, exit. 1296 */ 1297 ctl_msg.nmt = NULL; 1298 mutex_exit(&ctl_msg.nm_lock); 1299 } 1300