1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/machsystm.h> 28 #include <sys/archsystm.h> 29 #include <sys/prom_plat.h> 30 #include <sys/promif.h> 31 #include <sys/vm.h> 32 #include <sys/cpu.h> 33 #include <sys/bitset.h> 34 #include <sys/cpupart.h> 35 #include <sys/disp.h> 36 #include <sys/hypervisor_api.h> 37 #include <sys/traptrace.h> 38 #include <sys/modctl.h> 39 #include <sys/ldoms.h> 40 #include <sys/cpu_module.h> 41 #include <sys/mutex_impl.h> 42 #include <sys/rwlock.h> 43 #include <sys/sdt.h> 44 #include <sys/cmt.h> 45 #include <vm/vm_dep.h> 46 47 #ifdef TRAPTRACE 48 int mach_htraptrace_enable = 1; 49 #else 50 int mach_htraptrace_enable = 0; 51 #endif 52 int htrap_tr0_inuse = 0; 53 extern char htrap_tr0[]; /* prealloc buf for boot cpu */ 54 55 caddr_t mmu_fault_status_area; 56 57 extern void sfmmu_set_tsbs(void); 58 /* 59 * CPU IDLE optimization variables/routines 60 */ 61 static int enable_halt_idle_cpus = 1; 62 63 /* 64 * Defines for the idle_state_transition DTrace probe 65 * 66 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield) 67 * The agument passed is the state to which the CPU is transitioning. 68 * 69 * The states are defined here. 70 */ 71 #define IDLE_STATE_NORMAL 0 72 #define IDLE_STATE_YIELDED 1 73 74 #define SUN4V_CLOCK_TICK_THRESHOLD 64 75 #define SUN4V_CLOCK_TICK_NCPUS 64 76 77 extern int clock_tick_threshold; 78 extern int clock_tick_ncpus; 79 80 void 81 setup_trap_table(void) 82 { 83 caddr_t mmfsa_va; 84 extern caddr_t mmu_fault_status_area; 85 mmfsa_va = 86 mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id); 87 88 intr_init(CPU); /* init interrupt request free list */ 89 setwstate(WSTATE_KERN); 90 set_mmfsa_scratchpad(mmfsa_va); 91 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 92 sfmmu_set_tsbs(); 93 } 94 95 void 96 phys_install_has_changed(void) 97 { 98 99 } 100 101 /* 102 * Halt the present CPU until awoken via an interrupt 103 */ 104 static void 105 cpu_halt(void) 106 { 107 cpu_t *cpup = CPU; 108 processorid_t cpu_sid = cpup->cpu_seqid; 109 cpupart_t *cp = cpup->cpu_part; 110 int hset_update = 1; 111 volatile int *p = &cpup->cpu_disp->disp_nrunnable; 112 uint_t s; 113 114 /* 115 * If this CPU is online then we should notate our halting 116 * by adding ourselves to the partition's halted CPU 117 * bitset. This allows other CPUs to find/awaken us when 118 * work becomes available. 119 */ 120 if (CPU->cpu_flags & CPU_OFFLINE) 121 hset_update = 0; 122 123 /* 124 * Add ourselves to the partition's halted CPUs bitset 125 * and set our HALTED flag, if necessary. 126 * 127 * When a thread becomes runnable, it is placed on the queue 128 * and then the halted cpu bitset is checked to determine who 129 * (if anyone) should be awoken. We therefore need to first 130 * add ourselves to the halted bitset, and then check if there 131 * is any work available. The order is important to prevent a race 132 * that can lead to work languishing on a run queue somewhere while 133 * this CPU remains halted. 134 * 135 * Either the producing CPU will see we're halted and will awaken us, 136 * or this CPU will see the work available in disp_anywork() 137 */ 138 if (hset_update) { 139 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 140 membar_producer(); 141 bitset_atomic_add(&cp->cp_haltset, cpu_sid); 142 } 143 144 /* 145 * Check to make sure there's really nothing to do. 146 * Work destined for this CPU may become available after 147 * this check. We'll be notified through the clearing of our 148 * bit in the halted CPU bitset, and a poke. 149 */ 150 if (disp_anywork()) { 151 if (hset_update) { 152 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 153 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 154 } 155 return; 156 } 157 158 /* 159 * We're on our way to being halted. Wait until something becomes 160 * runnable locally or we are awaken (i.e. removed from the halt set). 161 * Note that the call to hv_cpu_yield() can return even if we have 162 * nothing to do. 163 * 164 * Disable interrupts now, so that we'll awaken immediately 165 * after halting if someone tries to poke us between now and 166 * the time we actually halt. 167 * 168 * We check for the presence of our bit after disabling interrupts. 169 * If it's cleared, we'll return. If the bit is cleared after 170 * we check then the poke will pop us out of the halted state. 171 * Also, if the offlined CPU has been brought back on-line, then 172 * we return as well. 173 * 174 * The ordering of the poke and the clearing of the bit by cpu_wakeup 175 * is important. 176 * cpu_wakeup() must clear, then poke. 177 * cpu_halt() must disable interrupts, then check for the bit. 178 * 179 * The check for anything locally runnable is here for performance 180 * and isn't needed for correctness. disp_nrunnable ought to be 181 * in our cache still, so it's inexpensive to check, and if there 182 * is anything runnable we won't have to wait for the poke. 183 * 184 * Any interrupt will awaken the cpu from halt. Looping here 185 * will filter spurious interrupts that wake us up, but don't 186 * represent a need for us to head back out to idle(). This 187 * will enable the idle loop to be more efficient and sleep in 188 * the processor pipeline for a larger percent of the time, 189 * which returns useful cycles to the peer hardware strand 190 * that shares the pipeline. 191 */ 192 s = disable_vec_intr(); 193 while (*p == 0 && 194 ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) || 195 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) { 196 197 DTRACE_PROBE1(idle__state__transition, 198 uint_t, IDLE_STATE_YIELDED); 199 (void) hv_cpu_yield(); 200 DTRACE_PROBE1(idle__state__transition, 201 uint_t, IDLE_STATE_NORMAL); 202 203 enable_vec_intr(s); 204 s = disable_vec_intr(); 205 } 206 207 /* 208 * We're no longer halted 209 */ 210 enable_vec_intr(s); 211 if (hset_update) { 212 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 213 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 214 } 215 } 216 217 /* 218 * If "cpu" is halted, then wake it up clearing its halted bit in advance. 219 * Otherwise, see if other CPUs in the cpu partition are halted and need to 220 * be woken up so that they can steal the thread we placed on this CPU. 221 * This function is only used on MP systems. 222 */ 223 static void 224 cpu_wakeup(cpu_t *cpu, int bound) 225 { 226 uint_t cpu_found; 227 processorid_t cpu_sid; 228 cpupart_t *cp; 229 230 cp = cpu->cpu_part; 231 cpu_sid = cpu->cpu_seqid; 232 if (bitset_in_set(&cp->cp_haltset, cpu_sid)) { 233 /* 234 * Clear the halted bit for that CPU since it will be 235 * poked in a moment. 236 */ 237 bitset_atomic_del(&cp->cp_haltset, cpu_sid); 238 /* 239 * We may find the current CPU present in the halted cpu bitset 240 * if we're in the context of an interrupt that occurred 241 * before we had a chance to clear our bit in cpu_halt(). 242 * Poking ourself is obviously unnecessary, since if 243 * we're here, we're not halted. 244 */ 245 if (cpu != CPU) 246 poke_cpu(cpu->cpu_id); 247 return; 248 } else { 249 /* 250 * This cpu isn't halted, but it's idle or undergoing a 251 * context switch. No need to awaken anyone else. 252 */ 253 if (cpu->cpu_thread == cpu->cpu_idle_thread || 254 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) 255 return; 256 } 257 258 /* 259 * No need to wake up other CPUs if this is for a bound thread. 260 */ 261 if (bound) 262 return; 263 264 /* 265 * The CPU specified for wakeup isn't currently halted, so check 266 * to see if there are any other halted CPUs in the partition, 267 * and if there are then awaken one. 268 */ 269 do { 270 cpu_found = bitset_find(&cp->cp_haltset); 271 if (cpu_found == (uint_t)-1) 272 return; 273 } while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0); 274 275 if (cpu_found != CPU->cpu_seqid) 276 poke_cpu(cpu_seq[cpu_found]->cpu_id); 277 } 278 279 void 280 mach_cpu_halt_idle(void) 281 { 282 if (enable_halt_idle_cpus) { 283 idle_cpu = cpu_halt; 284 disp_enq_thread = cpu_wakeup; 285 } 286 } 287 288 int 289 ndata_alloc_mmfsa(struct memlist *ndata) 290 { 291 size_t size; 292 293 size = MMFSA_SIZE * max_ncpus; 294 mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize); 295 if (mmu_fault_status_area == NULL) 296 return (-1); 297 return (0); 298 } 299 300 void 301 mach_memscrub(void) 302 { 303 /* no memscrub support for sun4v for now */ 304 } 305 306 void 307 mach_fpras() 308 { 309 /* no fpras support for sun4v for now */ 310 } 311 312 void 313 mach_hw_copy_limit(void) 314 { 315 /* HW copy limits set by individual CPU module */ 316 } 317 318 /* 319 * We need to enable soft ring functionality on Niagara platforms since 320 * one strand can't handle interrupts for a 1Gb NIC. So set the tunable 321 * mac_soft_ring_enable by default on this platform. 322 * mac_soft_ring_enable variable is defined in space.c and used by MAC 323 * module. This tunable in concert with mac_soft_ring_count (declared 324 * in mac.h) will configure the number of fanout soft rings for a link. 325 */ 326 extern boolean_t mac_soft_ring_enable; 327 void 328 startup_platform(void) 329 { 330 mac_soft_ring_enable = B_TRUE; 331 if (clock_tick_threshold == 0) 332 clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD; 333 if (clock_tick_ncpus == 0) 334 clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS; 335 /* set per-platform constants for mutex_backoff */ 336 mutex_backoff_base = 1; 337 mutex_cap_factor = 4; 338 if (l2_cache_node_count() > 1) { 339 /* VF for example */ 340 mutex_backoff_base = 2; 341 mutex_cap_factor = 64; 342 } 343 rw_lock_backoff = default_lock_backoff; 344 rw_lock_delay = default_lock_delay; 345 } 346 347 /* 348 * This function sets up hypervisor traptrace buffer 349 * This routine is called by the boot cpu only 350 */ 351 void 352 mach_htraptrace_setup(int cpuid) 353 { 354 TRAP_TRACE_CTL *ctlp; 355 int bootcpuid = getprocessorid(); /* invoked on boot cpu only */ 356 357 if (mach_htraptrace_enable && ((cpuid != bootcpuid) || 358 !htrap_tr0_inuse)) { 359 ctlp = &trap_trace_ctl[cpuid]; 360 ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 : 361 contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE); 362 if (ctlp->d.hvaddr_base == NULL) { 363 ctlp->d.hlimit = 0; 364 ctlp->d.hpaddr_base = NULL; 365 cmn_err(CE_WARN, "!cpu%d: failed to allocate HV " 366 "traptrace buffer", cpuid); 367 } else { 368 ctlp->d.hlimit = HTRAP_TSIZE; 369 ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base); 370 } 371 } 372 } 373 374 /* 375 * This function enables or disables the hypervisor traptracing 376 */ 377 void 378 mach_htraptrace_configure(int cpuid) 379 { 380 uint64_t ret; 381 uint64_t prev_buf, prev_bufsize; 382 uint64_t prev_enable; 383 uint64_t size; 384 TRAP_TRACE_CTL *ctlp; 385 386 ctlp = &trap_trace_ctl[cpuid]; 387 if (mach_htraptrace_enable) { 388 if ((ctlp->d.hvaddr_base != NULL) && 389 ((ctlp->d.hvaddr_base != htrap_tr0) || 390 (!htrap_tr0_inuse))) { 391 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 392 if ((ret == H_EOK) && (prev_bufsize != 0)) { 393 cmn_err(CE_CONT, 394 "!cpu%d: previous HV traptrace buffer of " 395 "size 0x%lx at address 0x%lx", cpuid, 396 prev_bufsize, prev_buf); 397 } 398 399 ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base, 400 ctlp->d.hlimit / 401 (sizeof (struct htrap_trace_record)), &size); 402 if (ret == H_EOK) { 403 ret = hv_ttrace_enable(\ 404 (uint64_t)TRAP_TENABLE_ALL, &prev_enable); 405 if (ret != H_EOK) { 406 cmn_err(CE_WARN, 407 "!cpu%d: HV traptracing not " 408 "enabled, ta: 0x%x returned error: " 409 "%ld", cpuid, TTRACE_ENABLE, ret); 410 } else { 411 if (ctlp->d.hvaddr_base == htrap_tr0) 412 htrap_tr0_inuse = 1; 413 } 414 } else { 415 cmn_err(CE_WARN, 416 "!cpu%d: HV traptrace buffer not " 417 "configured, ta: 0x%x returned error: %ld", 418 cpuid, TTRACE_BUF_CONF, ret); 419 } 420 /* 421 * set hvaddr_base to NULL when traptrace buffer 422 * registration fails 423 */ 424 if (ret != H_EOK) { 425 ctlp->d.hvaddr_base = NULL; 426 ctlp->d.hlimit = 0; 427 ctlp->d.hpaddr_base = NULL; 428 } 429 } 430 } else { 431 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 432 if ((ret == H_EOK) && (prev_bufsize != 0)) { 433 ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL, 434 &prev_enable); 435 if (ret == H_EOK) { 436 if (ctlp->d.hvaddr_base == htrap_tr0) 437 htrap_tr0_inuse = 0; 438 ctlp->d.hvaddr_base = NULL; 439 ctlp->d.hlimit = 0; 440 ctlp->d.hpaddr_base = NULL; 441 } else 442 cmn_err(CE_WARN, 443 "!cpu%d: HV traptracing is not disabled, " 444 "ta: 0x%x returned error: %ld", 445 cpuid, TTRACE_ENABLE, ret); 446 } 447 } 448 } 449 450 /* 451 * This function cleans up the hypervisor traptrace buffer 452 */ 453 void 454 mach_htraptrace_cleanup(int cpuid) 455 { 456 if (mach_htraptrace_enable) { 457 TRAP_TRACE_CTL *ctlp; 458 caddr_t httrace_buf_va; 459 460 ASSERT(cpuid < max_ncpus); 461 ctlp = &trap_trace_ctl[cpuid]; 462 httrace_buf_va = ctlp->d.hvaddr_base; 463 if (httrace_buf_va == htrap_tr0) { 464 bzero(httrace_buf_va, HTRAP_TSIZE); 465 } else if (httrace_buf_va != NULL) { 466 contig_mem_free(httrace_buf_va, HTRAP_TSIZE); 467 } 468 ctlp->d.hvaddr_base = NULL; 469 ctlp->d.hlimit = 0; 470 ctlp->d.hpaddr_base = NULL; 471 } 472 } 473 474 /* 475 * Load any required machine class (sun4v) specific drivers. 476 */ 477 void 478 load_mach_drivers(void) 479 { 480 /* 481 * We don't want to load these LDOMs-specific 482 * modules if domaining is not supported. Also, 483 * we must be able to run on non-LDOMs firmware. 484 */ 485 if (!domaining_supported()) 486 return; 487 488 /* 489 * Load the core domain services module 490 */ 491 if (modload("misc", "ds") == -1) 492 cmn_err(CE_NOTE, "!'ds' module failed to load"); 493 494 /* 495 * Load the rest of the domain services 496 */ 497 if (modload("misc", "fault_iso") == -1) 498 cmn_err(CE_NOTE, "!'fault_iso' module failed to load"); 499 500 if (modload("misc", "platsvc") == -1) 501 cmn_err(CE_NOTE, "!'platsvc' module failed to load"); 502 503 if (domaining_enabled() && modload("misc", "dr_cpu") == -1) 504 cmn_err(CE_NOTE, "!'dr_cpu' module failed to load"); 505 506 if (modload("misc", "dr_io") == -1) 507 cmn_err(CE_NOTE, "!'dr_io' module failed to load"); 508 509 if (modload("misc", "dr_mem") == -1) 510 cmn_err(CE_NOTE, "!'dr_mem' module failed to load"); 511 512 /* 513 * Attempt to attach any virtual device servers. These 514 * drivers must be loaded at start of day so that they 515 * can respond to any updates to the machine description. 516 * 517 * Since it is quite likely that a domain will not support 518 * one or more of these servers, failures are ignored. 519 */ 520 521 /* virtual disk server */ 522 (void) i_ddi_attach_hw_nodes("vds"); 523 524 /* virtual network switch */ 525 (void) i_ddi_attach_hw_nodes("vsw"); 526 527 /* virtual console concentrator */ 528 (void) i_ddi_attach_hw_nodes("vcc"); 529 } 530 531 void 532 set_platform_defaults(void) 533 { 534 /* 535 * Allow at most one context domain per 8 CPUs, which is ample for 536 * good performance. Do not make this too large, because it 537 * increases the space consumed in the per-process sfmmu structure. 538 */ 539 if (max_mmu_ctxdoms == 0) 540 max_mmu_ctxdoms = (NCPU + 7) / 8; 541 } 542