1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/machsystm.h> 28 #include <sys/archsystm.h> 29 #include <sys/prom_plat.h> 30 #include <sys/promif.h> 31 #include <sys/vm.h> 32 #include <sys/cpu.h> 33 #include <sys/atomic.h> 34 #include <sys/cpupart.h> 35 #include <sys/disp.h> 36 #include <sys/hypervisor_api.h> 37 #include <sys/traptrace.h> 38 #include <sys/modctl.h> 39 #include <sys/ldoms.h> 40 #include <sys/cpu_module.h> 41 #include <sys/mutex_impl.h> 42 #include <sys/rwlock.h> 43 #include <vm/vm_dep.h> 44 #include <sys/sdt.h> 45 46 #ifdef TRAPTRACE 47 int mach_htraptrace_enable = 1; 48 #else 49 int mach_htraptrace_enable = 0; 50 #endif 51 int htrap_tr0_inuse = 0; 52 extern char htrap_tr0[]; /* prealloc buf for boot cpu */ 53 54 caddr_t mmu_fault_status_area; 55 56 extern void sfmmu_set_tsbs(void); 57 /* 58 * CPU IDLE optimization variables/routines 59 */ 60 static int enable_halt_idle_cpus = 1; 61 62 /* 63 * Defines for the idle_state_transition DTrace probe 64 * 65 * The probe fires when the CPU undergoes an idle state change (e.g. hv yield) 66 * The agument passed is the state to which the CPU is transitioning. 67 * 68 * The states are defined here. 69 */ 70 #define IDLE_STATE_NORMAL 0 71 #define IDLE_STATE_YIELDED 1 72 73 #define SUN4V_CLOCK_TICK_THRESHOLD 64 74 #define SUN4V_CLOCK_TICK_NCPUS 64 75 76 extern int clock_tick_threshold; 77 extern int clock_tick_ncpus; 78 79 void 80 setup_trap_table(void) 81 { 82 caddr_t mmfsa_va; 83 extern caddr_t mmu_fault_status_area; 84 mmfsa_va = 85 mmu_fault_status_area + (MMFSA_SIZE * CPU->cpu_id); 86 87 intr_init(CPU); /* init interrupt request free list */ 88 setwstate(WSTATE_KERN); 89 set_mmfsa_scratchpad(mmfsa_va); 90 prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); 91 sfmmu_set_tsbs(); 92 } 93 94 void 95 phys_install_has_changed(void) 96 { 97 98 } 99 100 /* 101 * Halt the present CPU until awoken via an interrupt 102 */ 103 static void 104 cpu_halt(void) 105 { 106 cpu_t *cpup = CPU; 107 processorid_t cpun = cpup->cpu_id; 108 cpupart_t *cp = cpup->cpu_part; 109 int hset_update = 1; 110 volatile int *p = &cpup->cpu_disp->disp_nrunnable; 111 uint_t s; 112 113 /* 114 * If this CPU is online then we should notate our halting 115 * by adding ourselves to the partition's halted CPU 116 * bitmap. This allows other CPUs to find/awaken us when 117 * work becomes available. 118 */ 119 if (CPU->cpu_flags & CPU_OFFLINE) 120 hset_update = 0; 121 122 /* 123 * Add ourselves to the partition's halted CPUs bitmask 124 * and set our HALTED flag, if necessary. 125 * 126 * When a thread becomes runnable, it is placed on the queue 127 * and then the halted cpuset is checked to determine who 128 * (if anyone) should be awoken. We therefore need to first 129 * add ourselves to the halted cpuset, and then check if there 130 * is any work available. 131 */ 132 if (hset_update) { 133 cpup->cpu_disp_flags |= CPU_DISP_HALTED; 134 membar_producer(); 135 CPUSET_ATOMIC_ADD(cp->cp_mach->mc_haltset, cpun); 136 } 137 138 /* 139 * Check to make sure there's really nothing to do. 140 * Work destined for this CPU may become available after 141 * this check. We'll be notified through the clearing of our 142 * bit in the halted CPU bitmask, and a poke. 143 */ 144 if (disp_anywork()) { 145 if (hset_update) { 146 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 147 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun); 148 } 149 return; 150 } 151 152 /* 153 * We're on our way to being halted. Wait until something becomes 154 * runnable locally or we are awaken (i.e. removed from the halt set). 155 * Note that the call to hv_cpu_yield() can return even if we have 156 * nothing to do. 157 * 158 * Disable interrupts now, so that we'll awaken immediately 159 * after halting if someone tries to poke us between now and 160 * the time we actually halt. 161 * 162 * We check for the presence of our bit after disabling interrupts. 163 * If it's cleared, we'll return. If the bit is cleared after 164 * we check then the poke will pop us out of the halted state. 165 * Also, if the offlined CPU has been brought back on-line, then 166 * we return as well. 167 * 168 * The ordering of the poke and the clearing of the bit by cpu_wakeup 169 * is important. 170 * cpu_wakeup() must clear, then poke. 171 * cpu_halt() must disable interrupts, then check for the bit. 172 * 173 * The check for anything locally runnable is here for performance 174 * and isn't needed for correctness. disp_nrunnable ought to be 175 * in our cache still, so it's inexpensive to check, and if there 176 * is anything runnable we won't have to wait for the poke. 177 * 178 */ 179 s = disable_vec_intr(); 180 while (*p == 0 && 181 ((hset_update && CPU_IN_SET(cp->cp_mach->mc_haltset, cpun)) || 182 (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) { 183 184 DTRACE_PROBE1(idle__state__transition, 185 uint_t, IDLE_STATE_YIELDED); 186 (void) hv_cpu_yield(); 187 DTRACE_PROBE1(idle__state__transition, 188 uint_t, IDLE_STATE_NORMAL); 189 190 enable_vec_intr(s); 191 s = disable_vec_intr(); 192 } 193 194 /* 195 * We're no longer halted 196 */ 197 enable_vec_intr(s); 198 if (hset_update) { 199 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED; 200 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpun); 201 } 202 } 203 204 /* 205 * If "cpu" is halted, then wake it up clearing its halted bit in advance. 206 * Otherwise, see if other CPUs in the cpu partition are halted and need to 207 * be woken up so that they can steal the thread we placed on this CPU. 208 * This function is only used on MP systems. 209 */ 210 static void 211 cpu_wakeup(cpu_t *cpu, int bound) 212 { 213 uint_t cpu_found; 214 int result; 215 cpupart_t *cp; 216 217 cp = cpu->cpu_part; 218 if (CPU_IN_SET(cp->cp_mach->mc_haltset, cpu->cpu_id)) { 219 /* 220 * Clear the halted bit for that CPU since it will be 221 * poked in a moment. 222 */ 223 CPUSET_ATOMIC_DEL(cp->cp_mach->mc_haltset, cpu->cpu_id); 224 /* 225 * We may find the current CPU present in the halted cpuset 226 * if we're in the context of an interrupt that occurred 227 * before we had a chance to clear our bit in cpu_halt(). 228 * Poking ourself is obviously unnecessary, since if 229 * we're here, we're not halted. 230 */ 231 if (cpu != CPU) 232 poke_cpu(cpu->cpu_id); 233 return; 234 } else { 235 /* 236 * This cpu isn't halted, but it's idle or undergoing a 237 * context switch. No need to awaken anyone else. 238 */ 239 if (cpu->cpu_thread == cpu->cpu_idle_thread || 240 cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) 241 return; 242 } 243 244 /* 245 * No need to wake up other CPUs if the thread we just enqueued 246 * is bound. 247 */ 248 if (bound) 249 return; 250 251 /* 252 * See if there's any other halted CPUs. If there are, then 253 * select one, and awaken it. 254 * It's possible that after we find a CPU, somebody else 255 * will awaken it before we get the chance. 256 * In that case, look again. 257 */ 258 do { 259 CPUSET_FIND(cp->cp_mach->mc_haltset, cpu_found); 260 if (cpu_found == CPUSET_NOTINSET) 261 return; 262 263 ASSERT(cpu_found >= 0 && cpu_found < NCPU); 264 CPUSET_ATOMIC_XDEL(cp->cp_mach->mc_haltset, cpu_found, result); 265 } while (result < 0); 266 267 if (cpu_found != CPU->cpu_id) 268 poke_cpu(cpu_found); 269 } 270 271 void 272 mach_cpu_halt_idle() 273 { 274 if (enable_halt_idle_cpus) { 275 idle_cpu = cpu_halt; 276 disp_enq_thread = cpu_wakeup; 277 } 278 } 279 280 int 281 ndata_alloc_mmfsa(struct memlist *ndata) 282 { 283 size_t size; 284 285 size = MMFSA_SIZE * max_ncpus; 286 mmu_fault_status_area = ndata_alloc(ndata, size, ecache_alignsize); 287 if (mmu_fault_status_area == NULL) 288 return (-1); 289 return (0); 290 } 291 292 void 293 mach_memscrub(void) 294 { 295 /* no memscrub support for sun4v for now */ 296 } 297 298 void 299 mach_fpras() 300 { 301 /* no fpras support for sun4v for now */ 302 } 303 304 void 305 mach_hw_copy_limit(void) 306 { 307 /* HW copy limits set by individual CPU module */ 308 } 309 310 /* 311 * We need to enable soft ring functionality on Niagara platform since 312 * one strand can't handle interrupts for a 1Gb NIC. Set the tunable 313 * ip_squeue_soft_ring by default on this platform. We can also set 314 * ip_threads_per_cpu to track number of threads per core. The variables 315 * themselves are defined in space.c and used by IP module 316 */ 317 extern uint_t ip_threads_per_cpu; 318 extern boolean_t ip_squeue_soft_ring; 319 void 320 startup_platform(void) 321 { 322 ip_squeue_soft_ring = B_TRUE; 323 if (clock_tick_threshold == 0) 324 clock_tick_threshold = SUN4V_CLOCK_TICK_THRESHOLD; 325 if (clock_tick_ncpus == 0) 326 clock_tick_ncpus = SUN4V_CLOCK_TICK_NCPUS; 327 /* set per-platform constants for mutex_backoff */ 328 mutex_backoff_base = 1; 329 mutex_cap_factor = 4; 330 if (l2_cache_node_count() > 1) { 331 /* VF for example */ 332 mutex_backoff_base = 2; 333 mutex_cap_factor = 64; 334 } 335 rw_lock_backoff = default_lock_backoff; 336 rw_lock_delay = default_lock_delay; 337 } 338 339 /* 340 * This function sets up hypervisor traptrace buffer 341 * This routine is called by the boot cpu only 342 */ 343 void 344 mach_htraptrace_setup(int cpuid) 345 { 346 TRAP_TRACE_CTL *ctlp; 347 int bootcpuid = getprocessorid(); /* invoked on boot cpu only */ 348 349 if (mach_htraptrace_enable && ((cpuid != bootcpuid) || 350 !htrap_tr0_inuse)) { 351 ctlp = &trap_trace_ctl[cpuid]; 352 ctlp->d.hvaddr_base = (cpuid == bootcpuid) ? htrap_tr0 : 353 contig_mem_alloc_align(HTRAP_TSIZE, HTRAP_TSIZE); 354 if (ctlp->d.hvaddr_base == NULL) { 355 ctlp->d.hlimit = 0; 356 ctlp->d.hpaddr_base = NULL; 357 cmn_err(CE_WARN, "!cpu%d: failed to allocate HV " 358 "traptrace buffer", cpuid); 359 } else { 360 ctlp->d.hlimit = HTRAP_TSIZE; 361 ctlp->d.hpaddr_base = va_to_pa(ctlp->d.hvaddr_base); 362 } 363 } 364 } 365 366 /* 367 * This function enables or disables the hypervisor traptracing 368 */ 369 void 370 mach_htraptrace_configure(int cpuid) 371 { 372 uint64_t ret; 373 uint64_t prev_buf, prev_bufsize; 374 uint64_t prev_enable; 375 uint64_t size; 376 TRAP_TRACE_CTL *ctlp; 377 378 ctlp = &trap_trace_ctl[cpuid]; 379 if (mach_htraptrace_enable) { 380 if ((ctlp->d.hvaddr_base != NULL) && 381 ((ctlp->d.hvaddr_base != htrap_tr0) || 382 (!htrap_tr0_inuse))) { 383 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 384 if ((ret == H_EOK) && (prev_bufsize != 0)) { 385 cmn_err(CE_CONT, 386 "!cpu%d: previous HV traptrace buffer of " 387 "size 0x%lx at address 0x%lx", cpuid, 388 prev_bufsize, prev_buf); 389 } 390 391 ret = hv_ttrace_buf_conf(ctlp->d.hpaddr_base, 392 ctlp->d.hlimit / 393 (sizeof (struct htrap_trace_record)), &size); 394 if (ret == H_EOK) { 395 ret = hv_ttrace_enable(\ 396 (uint64_t)TRAP_TENABLE_ALL, &prev_enable); 397 if (ret != H_EOK) { 398 cmn_err(CE_WARN, 399 "!cpu%d: HV traptracing not " 400 "enabled, ta: 0x%x returned error: " 401 "%ld", cpuid, TTRACE_ENABLE, ret); 402 } else { 403 if (ctlp->d.hvaddr_base == htrap_tr0) 404 htrap_tr0_inuse = 1; 405 } 406 } else { 407 cmn_err(CE_WARN, 408 "!cpu%d: HV traptrace buffer not " 409 "configured, ta: 0x%x returned error: %ld", 410 cpuid, TTRACE_BUF_CONF, ret); 411 } 412 /* 413 * set hvaddr_base to NULL when traptrace buffer 414 * registration fails 415 */ 416 if (ret != H_EOK) { 417 ctlp->d.hvaddr_base = NULL; 418 ctlp->d.hlimit = 0; 419 ctlp->d.hpaddr_base = NULL; 420 } 421 } 422 } else { 423 ret = hv_ttrace_buf_info(&prev_buf, &prev_bufsize); 424 if ((ret == H_EOK) && (prev_bufsize != 0)) { 425 ret = hv_ttrace_enable((uint64_t)TRAP_TDISABLE_ALL, 426 &prev_enable); 427 if (ret == H_EOK) { 428 if (ctlp->d.hvaddr_base == htrap_tr0) 429 htrap_tr0_inuse = 0; 430 ctlp->d.hvaddr_base = NULL; 431 ctlp->d.hlimit = 0; 432 ctlp->d.hpaddr_base = NULL; 433 } else 434 cmn_err(CE_WARN, 435 "!cpu%d: HV traptracing is not disabled, " 436 "ta: 0x%x returned error: %ld", 437 cpuid, TTRACE_ENABLE, ret); 438 } 439 } 440 } 441 442 /* 443 * This function cleans up the hypervisor traptrace buffer 444 */ 445 void 446 mach_htraptrace_cleanup(int cpuid) 447 { 448 if (mach_htraptrace_enable) { 449 TRAP_TRACE_CTL *ctlp; 450 caddr_t httrace_buf_va; 451 452 ASSERT(cpuid < max_ncpus); 453 ctlp = &trap_trace_ctl[cpuid]; 454 httrace_buf_va = ctlp->d.hvaddr_base; 455 if (httrace_buf_va == htrap_tr0) { 456 bzero(httrace_buf_va, HTRAP_TSIZE); 457 } else if (httrace_buf_va != NULL) { 458 contig_mem_free(httrace_buf_va, HTRAP_TSIZE); 459 } 460 ctlp->d.hvaddr_base = NULL; 461 ctlp->d.hlimit = 0; 462 ctlp->d.hpaddr_base = NULL; 463 } 464 } 465 466 /* 467 * Load any required machine class (sun4v) specific drivers. 468 */ 469 void 470 load_mach_drivers(void) 471 { 472 /* 473 * We don't want to load these LDOMs-specific 474 * modules if domaining is not supported. Also, 475 * we must be able to run on non-LDOMs firmware. 476 */ 477 if (!domaining_supported()) 478 return; 479 480 /* 481 * Load the core domain services module 482 */ 483 if (modload("misc", "ds") == -1) 484 cmn_err(CE_NOTE, "!'ds' module failed to load"); 485 486 /* 487 * Load the rest of the domain services 488 */ 489 if (modload("misc", "fault_iso") == -1) 490 cmn_err(CE_NOTE, "!'fault_iso' module failed to load"); 491 492 if (modload("misc", "platsvc") == -1) 493 cmn_err(CE_NOTE, "!'platsvc' module failed to load"); 494 495 if (domaining_enabled() && modload("misc", "dr_cpu") == -1) 496 cmn_err(CE_NOTE, "!'dr_cpu' module failed to load"); 497 498 if (modload("misc", "dr_io") == -1) 499 cmn_err(CE_NOTE, "!'dr_io' module failed to load"); 500 501 /* 502 * Attempt to attach any virtual device servers. These 503 * drivers must be loaded at start of day so that they 504 * can respond to any updates to the machine description. 505 * 506 * Since it is quite likely that a domain will not support 507 * one or more of these servers, failures are ignored. 508 */ 509 510 /* virtual disk server */ 511 (void) i_ddi_attach_hw_nodes("vds"); 512 513 /* virtual network switch */ 514 (void) i_ddi_attach_hw_nodes("vsw"); 515 516 /* virtual console concentrator */ 517 (void) i_ddi_attach_hw_nodes("vcc"); 518 } 519