1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/machsystm.h> 27 #include <sys/archsystm.h> 28 #include <sys/prom_plat.h> 29 #include <sys/promif.h> 30 #include <sys/vm.h> 31 #include <sys/cpu.h> 32 #include <sys/bitset.h> 33 #include <sys/cpupart.h> 34 #include <sys/disp.h> 35 #include <sys/hypervisor_api.h> 36 #include <sys/traptrace.h> 37 #include <sys/modctl.h> 38 #include <sys/ldoms.h> 39 #include <sys/cpu_module.h> 40 #include <sys/mutex_impl.h> 41 #include <sys/rwlock.h> 42 #include <sys/sdt.h> 43 #include <sys/cmt.h> 44 #include <vm/vm_dep.h> 45 46 #ifdef TRAPTRACE 47 int mach_htraptrace_enable = 1; 48 #else 49 int mach_htraptrace_enable = 0; 50 #endif 51 int htrap_tr0_inuse = 0; 52 extern char htrap_tr0[]; /* prealloc buf for boot cpu */ 53 54 caddr_t mmu_fault_status_area; 55 56 extern void sfmmu_set_tsbs(void); 57 /* 58 * CPU IDLE optimization variables/routines 59 */ 60 static int enable_halt_idle_cpus = 1; 61 62 /* 63 * Defines for the idle_state_transition DTrace probe 64 * 65 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield) 66 * The agument passed is the state to which the CPU is transitioning. 67 * 68 * The states are defined here. 69 */ 70 #define IDLE_STATE_NORMAL 0 71 #define IDLE_STATE_YIELDED 1 72 73 #define SUN4V_CLOCK_TICK_THRESHOLD 64 74 #define SUN4V_CLOCK_TICK_NCPUS 64 75 76 extern int clock_tick_threshold; 77 extern int clock_tick_ncpus; 78 79 uint_t cp_haltset_fanout = 3; 80 81 void 82 setup_trap_table(void) 83 { 84 caddr_t mmfsa_va; 85 extern caddr_t mmu_fault_status_area; 86 mmfsa_va = 87 mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id); 88 89 intr_init(CPU); /* init interrupt request free list */ 90 setwstate(WSTATE_KERN); 91 set_mmfsa_scratchpad(mmfsa_va); 92 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 93 sfmmu_set_tsbs(); 94 } 95 96 void 97 phys_install_has_changed(void) 98 { 99 100 } 101 102 /* 103 * Halt the present CPU until awoken via an interrupt 104 */ 105 static void 106 cpu_halt(void) 107 { 108 cpu_t *cpup = CPU; 109 processorid_t cpu_sid = cpup->cpu_seqid; 110 cpupart_t *cp = cpup->cpu_part; 111 int hset_update = 1; 112 volatile int *p = &cpup->cpu_disp->disp_nrunnable; 113 uint_t s; 114 115 /* 116 * If this CPU is online then we should notate our halting 117 * by adding ourselves to the partition's halted CPU 118 * bitset. This allows other CPUs to find/awaken us when 119 * work becomes available. 120 */ 121 if (CPU->cpu_flags & CPU_OFFLINE) 122 hset_update = 0; 123 124 /* 125 * Add ourselves to the partition's halted CPUs bitset 126 * and set our HALTED flag, if necessary. 127 * 128 * When a thread becomes runnable, it is placed on the queue 129 * and then the halted cpu bitset is checked to determine who 130 * (if anyone) should be awoken. We therefore need to first 131 * add ourselves to the halted bitset, and then check if there 132 * is any work available. The order is important to prevent a race 133 * that can lead to work languishing on a run queue somewhere while 134 * this CPU remains halted. 135 * 136 * Either the producing CPU will see we're halted and will awaken us, 137 * or this CPU will see the work available in disp_anywork() 138 */ 139 if (hset_update) { 140 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 141 membar_producer(); 142 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 143 } 144 145 /* 146 * Check to make sure there's really nothing to do. 147 * Work destined for this CPU may become available after 148 * this check. We'll be notified through the clearing of our 149 * bit in the halted CPU bitset, and a poke. 150 */ 151 if (disp_anywork()) { 152 if (hset_update) { 153 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 154 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 155 } 156 return; 157 } 158 159 /* 160 * We're on our way to being halted. Wait until something becomes 161 * runnable locally or we are awaken (i.e. removed from the halt set). 162 * Note that the call to hv_cpu_yield() can return even if we have 163 * nothing to do. 164 * 165 * Disable interrupts now, so that we'll awaken immediately 166 * after halting if someone tries to poke us between now and 167 * the time we actually halt. 168 * 169 * We check for the presence of our bit after disabling interrupts. 170 * If it's cleared, we'll return. If the bit is cleared after 171 * we check then the poke will pop us out of the halted state. 172 * Also, if the offlined CPU has been brought back on-line, then 173 * we return as well. 174 * 175 * The ordering of the poke and the clearing of the bit by cpu_wakeup 176 * is important. 177 * cpu_wakeup() must clear, then poke. 178 * cpu_halt() must disable interrupts, then check for the bit. 179 * 180 * The check for anything locally runnable is here for performance 181 * and isn't needed for correctness. disp_nrunnable ought to be 182 * in our cache still, so it's inexpensive to check, and if there 183 * is anything runnable we won't have to wait for the poke. 184 * 185 * Any interrupt will awaken the cpu from halt. Looping here 186 * will filter spurious interrupts that wake us up, but don't 187 * represent a need for us to head back out to idle(). This 188 * will enable the idle loop to be more efficient and sleep in 189 * the processor pipeline for a larger percent of the time, 190 * which returns useful cycles to the peer hardware strand 191 * that shares the pipeline. 192 */ 193 s = disable_vec_intr(); 194 while (*p == 0 && 195 ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) || 196 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) { 197 198 DTRACE_PROBE1(idle__state__transition, 199 uint_t, IDLE_STATE_YIELDED); 200 (void) hv_cpu_yield(); 201 DTRACE_PROBE1(idle__state__transition, 202 uint_t, IDLE_STATE_NORMAL); 203 204 enable_vec_intr(s); 205 s = disable_vec_intr(); 206 } 207 208 /* 209 * We're no longer halted 210 */ 211 enable_vec_intr(s); 212 if (hset_update) { 213 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 214 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 215 } 216 } 217 218 /* 219 * If "cpu" is halted, then wake it up clearing its halted bit in advance. 220 * Otherwise, see if other CPUs in the cpu partition are halted and need to 221 * be woken up so that they can steal the thread we placed on this CPU. 222 * This function is only used on MP systems. 223 */ 224 static void 225 cpu_wakeup(cpu_t *cpu, int bound) 226 { 227 uint_t cpu_found; 228 processorid_t cpu_sid; 229 cpupart_t *cp; 230 231 cp = cpu->cpu_part; 232 cpu_sid = cpu->cpu_seqid; 233 if (bitset_in_set(&cp->cp_haltset, cpu_sid)) { 234 /* 235 * Clear the halted bit for that CPU since it will be 236 * poked in a moment. 237 */ 238 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 239 /* 240 * We may find the current CPU present in the halted cpu bitset 241 * if we're in the context of an interrupt that occurred 242 * before we had a chance to clear our bit in cpu_halt(). 243 * Poking ourself is obviously unnecessary, since if 244 * we're here, we're not halted. 245 */ 246 if (cpu != CPU) 247 poke_cpu(cpu->cpu_id); 248 return; 249 } else { 250 /* 251 * This cpu isn't halted, but it's idle or undergoing a 252 * context switch. No need to awaken anyone else. 253 */ 254 if (cpu->cpu_thread == cpu->cpu_idle_thread || 255 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) 256 return; 257 } 258 259 /* 260 * No need to wake up other CPUs if this is for a bound thread. 261 */ 262 if (bound) 263 return; 264 265 /* 266 * The CPU specified for wakeup isn't currently halted, so check 267 * to see if there are any other halted CPUs in the partition, 268 * and if there are then awaken one. 269 */ 270 do { 271 cpu_found = bitset_find(&cp->cp_haltset); 272 if (cpu_found == (uint_t)-1) 273 return; 274 } while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0); 275 276 if (cpu_found != CPU->cpu_seqid) 277 poke_cpu(cpu_seq[cpu_found]->cpu_id); 278 } 279 280 void 281 mach_cpu_halt_idle(void) 282 { 283 if (enable_halt_idle_cpus) { 284 idle_cpu = cpu_halt; 285 disp_enq_thread = cpu_wakeup; 286 } 287 } 288 289 int 290 ndata_alloc_mmfsa(struct memlist *ndata) 291 { 292 size_t size; 293 294 size = MMFSA_SIZE * max_ncpus; 295 mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize); 296 if (mmu_fault_status_area == NULL) 297 return (-1); 298 return (0); 299 } 300 301 void 302 mach_memscrub(void) 303 { 304 /* no memscrub support for sun4v for now */ 305 } 306 307 void 308 mach_fpras() 309 { 310 /* no fpras support for sun4v for now */ 311 } 312 313 void 314 mach_hw_copy_limit(void) 315 { 316 /* HW copy limits set by individual CPU module */ 317 } 318 319 /* 320 * We need to enable soft ring functionality on Niagara platforms since 321 * one strand can't handle interrupts for a 1Gb NIC. So set the tunable 322 * mac_soft_ring_enable by default on this platform. 323 * mac_soft_ring_enable variable is defined in space.c and used by MAC 324 * module. This tunable in concert with mac_soft_ring_count (declared 325 * in mac.h) will configure the number of fanout soft rings for a link. 326 */ 327 extern boolean_t mac_soft_ring_enable; 328 void 329 startup_platform(void) 330 { 331 mac_soft_ring_enable = B_TRUE; 332 if (clock_tick_threshold == 0) 333 clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD; 334 if (clock_tick_ncpus == 0) 335 clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS; 336 /* set per-platform constants for mutex_backoff */ 337 mutex_backoff_base = 1; 338 mutex_cap_factor = 4; 339 if (l2_cache_node_count() > 1) { 340 /* VF for example */ 341 mutex_backoff_base = 2; 342 mutex_cap_factor = 64; 343 } 344 rw_lock_backoff = default_lock_backoff; 345 rw_lock_delay = default_lock_delay; 346 } 347 348 /* 349 * This function sets up hypervisor traptrace buffer 350 * This routine is called by the boot cpu only 351 */ 352 void 353 mach_htraptrace_setup(int cpuid) 354 { 355 TRAP_TRACE_CTL *ctlp; 356 int bootcpuid = getprocessorid(); /* invoked on boot cpu only */ 357 358 if (mach_htraptrace_enable && ((cpuid != bootcpuid) || 359 !htrap_tr0_inuse)) { 360 ctlp = &trap_trace_ctl[cpuid]; 361 ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 : 362 contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE); 363 if (ctlp->d.hvaddr_base == NULL) { 364 ctlp->d.hlimit = 0; 365 ctlp->d.hpaddr_base = NULL; 366 cmn_err(CE_WARN, "!cpu%d: failed to allocate HV " 367 "traptrace buffer", cpuid); 368 } else { 369 ctlp->d.hlimit = HTRAP_TSIZE; 370 ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base); 371 } 372 } 373 } 374 375 /* 376 * This function enables or disables the hypervisor traptracing 377 */ 378 void 379 mach_htraptrace_configure(int cpuid) 380 { 381 uint64_t ret; 382 uint64_t prev_buf, prev_bufsize; 383 uint64_t prev_enable; 384 uint64_t size; 385 TRAP_TRACE_CTL *ctlp; 386 387 ctlp = &trap_trace_ctl[cpuid]; 388 if (mach_htraptrace_enable) { 389 if ((ctlp->d.hvaddr_base != NULL) && 390 ((ctlp->d.hvaddr_base != htrap_tr0) || 391 (!htrap_tr0_inuse))) { 392 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 393 if ((ret == H_EOK) && (prev_bufsize != 0)) { 394 cmn_err(CE_CONT, 395 "!cpu%d: previous HV traptrace buffer of " 396 "size 0x%lx at address 0x%lx", cpuid, 397 prev_bufsize, prev_buf); 398 } 399 400 ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base, 401 ctlp->d.hlimit / 402 (sizeof (struct htrap_trace_record)), &size); 403 if (ret == H_EOK) { 404 ret = hv_ttrace_enable(\ 405 (uint64_t)TRAP_TENABLE_ALL, &prev_enable); 406 if (ret != H_EOK) { 407 cmn_err(CE_WARN, 408 "!cpu%d: HV traptracing not " 409 "enabled, ta: 0x%x returned error: " 410 "%ld", cpuid, TTRACE_ENABLE, ret); 411 } else { 412 if (ctlp->d.hvaddr_base == htrap_tr0) 413 htrap_tr0_inuse = 1; 414 } 415 } else { 416 cmn_err(CE_WARN, 417 "!cpu%d: HV traptrace buffer not " 418 "configured, ta: 0x%x returned error: %ld", 419 cpuid, TTRACE_BUF_CONF, ret); 420 } 421 /* 422 * set hvaddr_base to NULL when traptrace buffer 423 * registration fails 424 */ 425 if (ret != H_EOK) { 426 ctlp->d.hvaddr_base = NULL; 427 ctlp->d.hlimit = 0; 428 ctlp->d.hpaddr_base = NULL; 429 } 430 } 431 } else { 432 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 433 if ((ret == H_EOK) && (prev_bufsize != 0)) { 434 ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL, 435 &prev_enable); 436 if (ret == H_EOK) { 437 if (ctlp->d.hvaddr_base == htrap_tr0) 438 htrap_tr0_inuse = 0; 439 ctlp->d.hvaddr_base = NULL; 440 ctlp->d.hlimit = 0; 441 ctlp->d.hpaddr_base = NULL; 442 } else 443 cmn_err(CE_WARN, 444 "!cpu%d: HV traptracing is not disabled, " 445 "ta: 0x%x returned error: %ld", 446 cpuid, TTRACE_ENABLE, ret); 447 } 448 } 449 } 450 451 /* 452 * This function cleans up the hypervisor traptrace buffer 453 */ 454 void 455 mach_htraptrace_cleanup(int cpuid) 456 { 457 if (mach_htraptrace_enable) { 458 TRAP_TRACE_CTL *ctlp; 459 caddr_t httrace_buf_va; 460 461 ASSERT(cpuid < max_ncpus); 462 ctlp = &trap_trace_ctl[cpuid]; 463 httrace_buf_va = ctlp->d.hvaddr_base; 464 if (httrace_buf_va == htrap_tr0) { 465 bzero(httrace_buf_va, HTRAP_TSIZE); 466 } else if (httrace_buf_va != NULL) { 467 contig_mem_free(httrace_buf_va, HTRAP_TSIZE); 468 } 469 ctlp->d.hvaddr_base = NULL; 470 ctlp->d.hlimit = 0; 471 ctlp->d.hpaddr_base = NULL; 472 } 473 } 474 475 /* 476 * Load any required machine class (sun4v) specific drivers. 477 */ 478 void 479 load_mach_drivers(void) 480 { 481 /* 482 * We don't want to load these LDOMs-specific 483 * modules if domaining is not supported. Also, 484 * we must be able to run on non-LDOMs firmware. 485 */ 486 if (!domaining_supported()) 487 return; 488 489 /* 490 * Load the core domain services module 491 */ 492 if (modload("misc", "ds") == -1) 493 cmn_err(CE_NOTE, "!'ds' module failed to load"); 494 495 /* 496 * Load the rest of the domain services 497 */ 498 if (modload("misc", "fault_iso") == -1) 499 cmn_err(CE_NOTE, "!'fault_iso' module failed to load"); 500 501 if (modload("misc", "platsvc") == -1) 502 cmn_err(CE_NOTE, "!'platsvc' module failed to load"); 503 504 if (domaining_enabled() && modload("misc", "dr_cpu") == -1) 505 cmn_err(CE_NOTE, "!'dr_cpu' module failed to load"); 506 507 if (modload("misc", "dr_io") == -1) 508 cmn_err(CE_NOTE, "!'dr_io' module failed to load"); 509 510 if (modload("misc", "dr_mem") == -1) 511 cmn_err(CE_NOTE, "!'dr_mem' module failed to load"); 512 513 /* 514 * Attempt to attach any virtual device servers. These 515 * drivers must be loaded at start of day so that they 516 * can respond to any updates to the machine description. 517 * 518 * Since it is quite likely that a domain will not support 519 * one or more of these servers, failures are ignored. 520 */ 521 522 /* virtual disk server */ 523 (void) i_ddi_attach_hw_nodes("vds"); 524 525 /* virtual network switch */ 526 (void) i_ddi_attach_hw_nodes("vsw"); 527 528 /* virtual console concentrator */ 529 (void) i_ddi_attach_hw_nodes("vcc"); 530 } 531 532 void 533 set_platform_defaults(void) 534 { 535 /* 536 * Allow at most one context domain per 8 CPUs, which is ample for 537 * good performance. Do not make this too large, because it 538 * increases the space consumed in the per-process sfmmu structure. 539 */ 540 if (max_mmu_ctxdoms == 0) 541 max_mmu_ctxdoms = (NCPU + 7) / 8; 542 } 543