1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/machsystm.h> 30 #include <sys/archsystm.h> 31 #include <sys/prom_plat.h> 32 #include <sys/promif.h> 33 #include <sys/vm.h> 34 #include <sys/cpu.h> 35 #include <sys/atomic.h> 36 #include <sys/cpupart.h> 37 #include <sys/disp.h> 38 #include <sys/hypervisor_api.h> 39 #include <sys/traptrace.h> 40 #include <sys/modctl.h> 41 #include <sys/ldoms.h> 42 #include <sys/cpu_module.h> 43 #include <sys/mutex_impl.h> 44 #include <sys/rwlock.h> 45 #include <vm/vm_dep.h> 46 #include <sys/sdt.h> 47 48 #ifdef TRAPTRACE 49 int mach_htraptrace_enable = 1; 50 #else 51 int mach_htraptrace_enable = 0; 52 #endif 53 int htrap_tr0_inuse = 0; 54 extern char htrap_tr0[]; /* prealloc buf for boot cpu */ 55 56 caddr_t mmu_fault_status_area; 57 58 extern void sfmmu_set_tsbs(void); 59 /* 60 * CPU IDLE optimization variables/routines 61 */ 62 static int enable_halt_idle_cpus = 1; 63 64 /* 65 * Defines for the idle_state_transition DTrace probe 66 * 67 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield) 68 * The agument passed is the state to which the CPU is transitioning. 69 * 70 * The states are defined here. 71 */ 72 #define IDLE_STATE_NORMAL 0 73 #define IDLE_STATE_YIELDED 1 74 75 #define SUN4V_CLOCK_TICK_THRESHOLD 64 76 #define SUN4V_CLOCK_TICK_NCPUS 64 77 78 extern int clock_tick_threshold; 79 extern int clock_tick_ncpus; 80 81 void 82 setup_trap_table(void) 83 { 84 caddr_t mmfsa_va; 85 extern caddr_t mmu_fault_status_area; 86 mmfsa_va = 87 mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id); 88 89 intr_init(CPU); /* init interrupt request free list */ 90 setwstate(WSTATE_KERN); 91 set_mmfsa_scratchpad(mmfsa_va); 92 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 93 sfmmu_set_tsbs(); 94 } 95 96 void 97 phys_install_has_changed(void) 98 { 99 100 } 101 102 /* 103 * Halt the present CPU until awoken via an interrupt 104 */ 105 static void 106 cpu_halt(void) 107 { 108 cpu_t *cpup = CPU; 109 processorid_t cpun = cpup->cpu_id; 110 cpupart_t *cp = cpup->cpu_part; 111 int hset_update = 1; 112 volatile int *p = &cpup->cpu_disp->disp_nrunnable; 113 uint_t s; 114 115 /* 116 * If this CPU is online, and there's multiple CPUs 117 * in the system, then we should notate our halting 118 * by adding ourselves to the partition's halted CPU 119 * bitmap. This allows other CPUs to find/awaken us when 120 * work becomes available. 121 */ 122 if (CPU->cpu_flags & CPU_OFFLINE || ncpus == 1) 123 hset_update = 0; 124 125 /* 126 * Add ourselves to the partition's halted CPUs bitmask 127 * and set our HALTED flag, if necessary. 128 * 129 * When a thread becomes runnable, it is placed on the queue 130 * and then the halted cpuset is checked to determine who 131 * (if anyone) should be awoken. We therefore need to first 132 * add ourselves to the halted cpuset, and then check if there 133 * is any work available. 134 */ 135 if (hset_update) { 136 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 137 membar_producer(); 138 CPUSET_ATOMIC_ADD(cp->cp_mach->mc_haltset, cpun); 139 } 140 141 /* 142 * Check to make sure there's really nothing to do. 143 * Work destined for this CPU may become available after 144 * this check. We'll be notified through the clearing of our 145 * bit in the halted CPU bitmask, and a poke. 146 */ 147 if (disp_anywork()) { 148 if (hset_update) { 149 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 150 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun); 151 } 152 return; 153 } 154 155 /* 156 * We're on our way to being halted. Wait until something becomes 157 * runnable locally or we are awaken (i.e. removed from the halt set). 158 * Note that the call to hv_cpu_yield() can return even if we have 159 * nothing to do. 160 * 161 * Disable interrupts now, so that we'll awaken immediately 162 * after halting if someone tries to poke us between now and 163 * the time we actually halt. 164 * 165 * We check for the presence of our bit after disabling interrupts. 166 * If it's cleared, we'll return. If the bit is cleared after 167 * we check then the poke will pop us out of the halted state. 168 * Also, if the offlined CPU has been brought back on-line, then 169 * we return as well. 170 * 171 * The ordering of the poke and the clearing of the bit by cpu_wakeup 172 * is important. 173 * cpu_wakeup() must clear, then poke. 174 * cpu_halt() must disable interrupts, then check for the bit. 175 * 176 * The check for anything locally runnable is here for performance 177 * and isn't needed for correctness. disp_nrunnable ought to be 178 * in our cache still, so it's inexpensive to check, and if there 179 * is anything runnable we won't have to wait for the poke. 180 * 181 */ 182 s = disable_vec_intr(); 183 while (*p == 0 && 184 ((hset_update && CPU_IN_SET(cp->cp_mach->mc_haltset, cpun)) || 185 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) { 186 187 DTRACE_PROBE1(idle__state__transition, 188 uint_t, IDLE_STATE_YIELDED); 189 (void) hv_cpu_yield(); 190 DTRACE_PROBE1(idle__state__transition, 191 uint_t, IDLE_STATE_NORMAL); 192 193 enable_vec_intr(s); 194 s = disable_vec_intr(); 195 } 196 197 /* 198 * We're no longer halted 199 */ 200 enable_vec_intr(s); 201 if (hset_update) { 202 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 203 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun); 204 } 205 } 206 207 /* 208 * If "cpu" is halted, then wake it up clearing its halted bit in advance. 209 * Otherwise, see if other CPUs in the cpu partition are halted and need to 210 * be woken up so that they can steal the thread we placed on this CPU. 211 * This function is only used on MP systems. 212 */ 213 static void 214 cpu_wakeup(cpu_t *cpu, int bound) 215 { 216 uint_t cpu_found; 217 int result; 218 cpupart_t *cp; 219 220 cp = cpu->cpu_part; 221 if (CPU_IN_SET(cp->cp_mach->mc_haltset, cpu->cpu_id)) { 222 /* 223 * Clear the halted bit for that CPU since it will be 224 * poked in a moment. 225 */ 226 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpu->cpu_id); 227 /* 228 * We may find the current CPU present in the halted cpuset 229 * if we're in the context of an interrupt that occurred 230 * before we had a chance to clear our bit in cpu_halt(). 231 * Poking ourself is obviously unnecessary, since if 232 * we're here, we're not halted. 233 */ 234 if (cpu != CPU) 235 poke_cpu(cpu->cpu_id); 236 return; 237 } else { 238 /* 239 * This cpu isn't halted, but it's idle or undergoing a 240 * context switch. No need to awaken anyone else. 241 */ 242 if (cpu->cpu_thread == cpu->cpu_idle_thread || 243 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) 244 return; 245 } 246 247 /* 248 * No need to wake up other CPUs if the thread we just enqueued 249 * is bound. 250 */ 251 if (bound) 252 return; 253 254 /* 255 * See if there's any other halted CPUs. If there are, then 256 * select one, and awaken it. 257 * It's possible that after we find a CPU, somebody else 258 * will awaken it before we get the chance. 259 * In that case, look again. 260 */ 261 do { 262 CPUSET_FIND(cp->cp_mach->mc_haltset, cpu_found); 263 if (cpu_found == CPUSET_NOTINSET) 264 return; 265 266 ASSERT(cpu_found >= 0 && cpu_found < NCPU); 267 CPUSET_ATOMIC_XDEL(cp->cp_mach->mc_haltset, cpu_found, result); 268 } while (result < 0); 269 270 if (cpu_found != CPU->cpu_id) 271 poke_cpu(cpu_found); 272 } 273 274 void 275 mach_cpu_halt_idle() 276 { 277 if (enable_halt_idle_cpus) { 278 idle_cpu = cpu_halt; 279 disp_enq_thread = cpu_wakeup; 280 } 281 } 282 283 int 284 ndata_alloc_mmfsa(struct memlist *ndata) 285 { 286 size_t size; 287 288 size = MMFSA_SIZE * max_ncpus; 289 mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize); 290 if (mmu_fault_status_area == NULL) 291 return (-1); 292 return (0); 293 } 294 295 void 296 mach_memscrub(void) 297 { 298 /* no memscrub support for sun4v for now */ 299 } 300 301 void 302 mach_fpras() 303 { 304 /* no fpras support for sun4v for now */ 305 } 306 307 void 308 mach_hw_copy_limit(void) 309 { 310 /* HW copy limits set by individual CPU module */ 311 } 312 313 /* 314 * We need to enable soft ring functionality on Niagara platform since 315 * one strand can't handle interrupts for a 1Gb NIC. Set the tunable 316 * ip_squeue_soft_ring by default on this platform. We can also set 317 * ip_threads_per_cpu to track number of threads per core. The variables 318 * themselves are defined in space.c and used by IP module 319 */ 320 extern uint_t ip_threads_per_cpu; 321 extern boolean_t ip_squeue_soft_ring; 322 void 323 startup_platform(void) 324 { 325 ip_squeue_soft_ring = B_TRUE; 326 if (clock_tick_threshold == 0) 327 clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD; 328 if (clock_tick_ncpus == 0) 329 clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS; 330 /* set per-platform constants for mutex_backoff */ 331 mutex_backoff_base = 1; 332 mutex_cap_factor = 4; 333 if (l2_cache_node_count() > 1) { 334 /* VF for example */ 335 mutex_backoff_base = 2; 336 mutex_cap_factor = 64; 337 } 338 rw_lock_backoff = default_lock_backoff; 339 rw_lock_delay = default_lock_delay; 340 } 341 342 /* 343 * This function sets up hypervisor traptrace buffer 344 * This routine is called by the boot cpu only 345 */ 346 void 347 mach_htraptrace_setup(int cpuid) 348 { 349 TRAP_TRACE_CTL *ctlp; 350 int bootcpuid = getprocessorid(); /* invoked on boot cpu only */ 351 352 if (mach_htraptrace_enable && ((cpuid != bootcpuid) || 353 !htrap_tr0_inuse)) { 354 ctlp = &trap_trace_ctl[cpuid]; 355 ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 : 356 contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE); 357 if (ctlp->d.hvaddr_base == NULL) { 358 ctlp->d.hlimit = 0; 359 ctlp->d.hpaddr_base = NULL; 360 cmn_err(CE_WARN, "!cpu%d: failed to allocate HV " 361 "traptrace buffer", cpuid); 362 } else { 363 ctlp->d.hlimit = HTRAP_TSIZE; 364 ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base); 365 } 366 } 367 } 368 369 /* 370 * This function enables or disables the hypervisor traptracing 371 */ 372 void 373 mach_htraptrace_configure(int cpuid) 374 { 375 uint64_t ret; 376 uint64_t prev_buf, prev_bufsize; 377 uint64_t prev_enable; 378 uint64_t size; 379 TRAP_TRACE_CTL *ctlp; 380 381 ctlp = &trap_trace_ctl[cpuid]; 382 if (mach_htraptrace_enable) { 383 if ((ctlp->d.hvaddr_base != NULL) && 384 ((ctlp->d.hvaddr_base != htrap_tr0) || 385 (!htrap_tr0_inuse))) { 386 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 387 if ((ret == H_EOK) && (prev_bufsize != 0)) { 388 cmn_err(CE_CONT, 389 "!cpu%d: previous HV traptrace buffer of " 390 "size 0x%lx at address 0x%lx", cpuid, 391 prev_bufsize, prev_buf); 392 } 393 394 ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base, 395 ctlp->d.hlimit / 396 (sizeof (struct htrap_trace_record)), &size); 397 if (ret == H_EOK) { 398 ret = hv_ttrace_enable(\ 399 (uint64_t)TRAP_TENABLE_ALL, &prev_enable); 400 if (ret != H_EOK) { 401 cmn_err(CE_WARN, 402 "!cpu%d: HV traptracing not " 403 "enabled, ta: 0x%x returned error: " 404 "%ld", cpuid, TTRACE_ENABLE, ret); 405 } else { 406 if (ctlp->d.hvaddr_base == htrap_tr0) 407 htrap_tr0_inuse = 1; 408 } 409 } else { 410 cmn_err(CE_WARN, 411 "!cpu%d: HV traptrace buffer not " 412 "configured, ta: 0x%x returned error: %ld", 413 cpuid, TTRACE_BUF_CONF, ret); 414 } 415 /* 416 * set hvaddr_base to NULL when traptrace buffer 417 * registration fails 418 */ 419 if (ret != H_EOK) { 420 ctlp->d.hvaddr_base = NULL; 421 ctlp->d.hlimit = 0; 422 ctlp->d.hpaddr_base = NULL; 423 } 424 } 425 } else { 426 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 427 if ((ret == H_EOK) && (prev_bufsize != 0)) { 428 ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL, 429 &prev_enable); 430 if (ret == H_EOK) { 431 if (ctlp->d.hvaddr_base == htrap_tr0) 432 htrap_tr0_inuse = 0; 433 ctlp->d.hvaddr_base = NULL; 434 ctlp->d.hlimit = 0; 435 ctlp->d.hpaddr_base = NULL; 436 } else 437 cmn_err(CE_WARN, 438 "!cpu%d: HV traptracing is not disabled, " 439 "ta: 0x%x returned error: %ld", 440 cpuid, TTRACE_ENABLE, ret); 441 } 442 } 443 } 444 445 /* 446 * This function cleans up the hypervisor traptrace buffer 447 */ 448 void 449 mach_htraptrace_cleanup(int cpuid) 450 { 451 if (mach_htraptrace_enable) { 452 TRAP_TRACE_CTL *ctlp; 453 caddr_t httrace_buf_va; 454 455 ASSERT(cpuid < max_ncpus); 456 ctlp = &trap_trace_ctl[cpuid]; 457 httrace_buf_va = ctlp->d.hvaddr_base; 458 if (httrace_buf_va == htrap_tr0) { 459 bzero(httrace_buf_va, HTRAP_TSIZE); 460 } else if (httrace_buf_va != NULL) { 461 contig_mem_free(httrace_buf_va, HTRAP_TSIZE); 462 } 463 ctlp->d.hvaddr_base = NULL; 464 ctlp->d.hlimit = 0; 465 ctlp->d.hpaddr_base = NULL; 466 } 467 } 468 469 /* 470 * Load any required machine class (sun4v) specific drivers. 471 */ 472 void 473 load_mach_drivers(void) 474 { 475 /* 476 * We don't want to load these LDOMs-specific 477 * modules if domaining is not supported. Also, 478 * we must be able to run on non-LDOMs firmware. 479 */ 480 if (!domaining_supported()) 481 return; 482 483 /* 484 * Load the core domain services module 485 */ 486 if (modload("misc", "ds") == -1) 487 cmn_err(CE_NOTE, "!'ds' module failed to load"); 488 489 /* 490 * Load the rest of the domain services 491 */ 492 if (modload("misc", "fault_iso") == -1) 493 cmn_err(CE_NOTE, "!'fault_iso' module failed to load"); 494 495 if (modload("misc", "platsvc") == -1) 496 cmn_err(CE_NOTE, "!'platsvc' module failed to load"); 497 498 if (domaining_enabled() && modload("misc", "dr_cpu") == -1) 499 cmn_err(CE_NOTE, "!'dr_cpu' module failed to load"); 500 501 /* 502 * Attempt to attach any virtual device servers. These 503 * drivers must be loaded at start of day so that they 504 * can respond to any updates to the machine description. 505 * 506 * Since it is quite likely that a domain will not support 507 * one or more of these servers, failures are ignored. 508 */ 509 510 /* virtual disk server */ 511 (void) i_ddi_attach_hw_nodes("vds"); 512 513 /* virtual network switch */ 514 (void) i_ddi_attach_hw_nodes("vsw"); 515 516 /* virtual console concentrator */ 517 (void) i_ddi_attach_hw_nodes("vcc"); 518 } 519