1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Architecture-independent CPU control functions. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/var.h> 33 #include <sys/thread.h> 34 #include <sys/cpuvar.h> 35 #include <sys/kstat.h> 36 #include <sys/uadmin.h> 37 #include <sys/systm.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/procset.h> 41 #include <sys/processor.h> 42 #include <sys/debug.h> 43 #include <sys/cpupart.h> 44 #include <sys/lgrp.h> 45 #include <sys/pset.h> 46 #include <sys/pghw.h> 47 #include <sys/kmem.h> 48 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ 49 #include <sys/atomic.h> 50 #include <sys/callb.h> 51 #include <sys/vtrace.h> 52 #include <sys/cyclic.h> 53 #include <sys/bitmap.h> 54 #include <sys/nvpair.h> 55 #include <sys/pool_pset.h> 56 #include <sys/msacct.h> 57 #include <sys/time.h> 58 #include <sys/archsystm.h> 59 #if defined(__x86) || defined(__amd64) 60 #include <sys/x86_archext.h> 61 #endif 62 #include <sys/callo.h> 63 64 extern int mp_cpu_start(cpu_t *); 65 extern int mp_cpu_stop(cpu_t *); 66 extern int mp_cpu_poweron(cpu_t *); 67 extern int mp_cpu_poweroff(cpu_t *); 68 extern int mp_cpu_configure(int); 69 extern int mp_cpu_unconfigure(int); 70 extern void mp_cpu_faulted_enter(cpu_t *); 71 extern void mp_cpu_faulted_exit(cpu_t *); 72 73 extern int cmp_cpu_to_chip(processorid_t cpuid); 74 #ifdef __sparcv9 75 extern char *cpu_fru_fmri(cpu_t *cp); 76 #endif 77 78 static void cpu_add_active_internal(cpu_t *cp); 79 static void cpu_remove_active(cpu_t *cp); 80 static void cpu_info_kstat_create(cpu_t *cp); 81 static void cpu_info_kstat_destroy(cpu_t *cp); 82 static void cpu_stats_kstat_create(cpu_t *cp); 83 static void cpu_stats_kstat_destroy(cpu_t *cp); 84 85 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw); 86 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw); 87 static int cpu_stat_ks_update(kstat_t *ksp, int rw); 88 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t); 89 90 /* 91 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active, 92 * and dispatch queue reallocations. The lock ordering with respect to 93 * related locks is: 94 * 95 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock() 96 * 97 * Warning: Certain sections of code do not use the cpu_lock when 98 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since 99 * all cpus are paused during modifications to this list, a solution 100 * to protect the list is too either disable kernel preemption while 101 * walking the list, *or* recheck the cpu_next pointer at each 102 * iteration in the loop. Note that in no cases can any cached 103 * copies of the cpu pointers be kept as they may become invalid. 104 */ 105 kmutex_t cpu_lock; 106 cpu_t *cpu_list; /* list of all CPUs */ 107 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */ 108 cpu_t *cpu_active; /* list of active CPUs */ 109 static cpuset_t cpu_available; /* set of available CPUs */ 110 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */ 111 112 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */ 113 114 /* 115 * max_ncpus keeps the max cpus the system can have. Initially 116 * it's NCPU, but since most archs scan the devtree for cpus 117 * fairly early on during boot, the real max can be known before 118 * ncpus is set (useful for early NCPU based allocations). 119 */ 120 int max_ncpus = NCPU; 121 /* 122 * platforms that set max_ncpus to maxiumum number of cpus that can be 123 * dynamically added will set boot_max_ncpus to the number of cpus found 124 * at device tree scan time during boot. 125 */ 126 int boot_max_ncpus = -1; 127 int boot_ncpus = -1; 128 /* 129 * Maximum possible CPU id. This can never be >= NCPU since NCPU is 130 * used to size arrays that are indexed by CPU id. 131 */ 132 processorid_t max_cpuid = NCPU - 1; 133 134 int ncpus = 1; 135 int ncpus_online = 1; 136 137 /* 138 * CPU that we're trying to offline. Protected by cpu_lock. 139 */ 140 cpu_t *cpu_inmotion; 141 142 /* 143 * Can be raised to suppress further weakbinding, which are instead 144 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, 145 * while individual thread weakbinding synchronisation is done under thread 146 * lock. 147 */ 148 int weakbindingbarrier; 149 150 /* 151 * Variables used in pause_cpus(). 152 */ 153 static volatile char safe_list[NCPU]; 154 155 static struct _cpu_pause_info { 156 int cp_spl; /* spl saved in pause_cpus() */ 157 volatile int cp_go; /* Go signal sent after all ready */ 158 int cp_count; /* # of CPUs to pause */ 159 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */ 160 kthread_id_t cp_paused; 161 } cpu_pause_info; 162 163 static kmutex_t pause_free_mutex; 164 static kcondvar_t pause_free_cv; 165 166 void *(*cpu_pause_func)(void *) = NULL; 167 168 169 static struct cpu_sys_stats_ks_data { 170 kstat_named_t cpu_ticks_idle; 171 kstat_named_t cpu_ticks_user; 172 kstat_named_t cpu_ticks_kernel; 173 kstat_named_t cpu_ticks_wait; 174 kstat_named_t cpu_nsec_idle; 175 kstat_named_t cpu_nsec_user; 176 kstat_named_t cpu_nsec_kernel; 177 kstat_named_t cpu_nsec_intr; 178 kstat_named_t cpu_load_intr; 179 kstat_named_t wait_ticks_io; 180 kstat_named_t bread; 181 kstat_named_t bwrite; 182 kstat_named_t lread; 183 kstat_named_t lwrite; 184 kstat_named_t phread; 185 kstat_named_t phwrite; 186 kstat_named_t pswitch; 187 kstat_named_t trap; 188 kstat_named_t intr; 189 kstat_named_t syscall; 190 kstat_named_t sysread; 191 kstat_named_t syswrite; 192 kstat_named_t sysfork; 193 kstat_named_t sysvfork; 194 kstat_named_t sysexec; 195 kstat_named_t readch; 196 kstat_named_t writech; 197 kstat_named_t rcvint; 198 kstat_named_t xmtint; 199 kstat_named_t mdmint; 200 kstat_named_t rawch; 201 kstat_named_t canch; 202 kstat_named_t outch; 203 kstat_named_t msg; 204 kstat_named_t sema; 205 kstat_named_t namei; 206 kstat_named_t ufsiget; 207 kstat_named_t ufsdirblk; 208 kstat_named_t ufsipage; 209 kstat_named_t ufsinopage; 210 kstat_named_t procovf; 211 kstat_named_t intrthread; 212 kstat_named_t intrblk; 213 kstat_named_t intrunpin; 214 kstat_named_t idlethread; 215 kstat_named_t inv_swtch; 216 kstat_named_t nthreads; 217 kstat_named_t cpumigrate; 218 kstat_named_t xcalls; 219 kstat_named_t mutex_adenters; 220 kstat_named_t rw_rdfails; 221 kstat_named_t rw_wrfails; 222 kstat_named_t modload; 223 kstat_named_t modunload; 224 kstat_named_t bawrite; 225 kstat_named_t iowait; 226 } cpu_sys_stats_ks_data_template = { 227 { "cpu_ticks_idle", KSTAT_DATA_UINT64 }, 228 { "cpu_ticks_user", KSTAT_DATA_UINT64 }, 229 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 }, 230 { "cpu_ticks_wait", KSTAT_DATA_UINT64 }, 231 { "cpu_nsec_idle", KSTAT_DATA_UINT64 }, 232 { "cpu_nsec_user", KSTAT_DATA_UINT64 }, 233 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 }, 234 { "cpu_nsec_intr", KSTAT_DATA_UINT64 }, 235 { "cpu_load_intr", KSTAT_DATA_UINT64 }, 236 { "wait_ticks_io", KSTAT_DATA_UINT64 }, 237 { "bread", KSTAT_DATA_UINT64 }, 238 { "bwrite", KSTAT_DATA_UINT64 }, 239 { "lread", KSTAT_DATA_UINT64 }, 240 { "lwrite", KSTAT_DATA_UINT64 }, 241 { "phread", KSTAT_DATA_UINT64 }, 242 { "phwrite", KSTAT_DATA_UINT64 }, 243 { "pswitch", KSTAT_DATA_UINT64 }, 244 { "trap", KSTAT_DATA_UINT64 }, 245 { "intr", KSTAT_DATA_UINT64 }, 246 { "syscall", KSTAT_DATA_UINT64 }, 247 { "sysread", KSTAT_DATA_UINT64 }, 248 { "syswrite", KSTAT_DATA_UINT64 }, 249 { "sysfork", KSTAT_DATA_UINT64 }, 250 { "sysvfork", KSTAT_DATA_UINT64 }, 251 { "sysexec", KSTAT_DATA_UINT64 }, 252 { "readch", KSTAT_DATA_UINT64 }, 253 { "writech", KSTAT_DATA_UINT64 }, 254 { "rcvint", KSTAT_DATA_UINT64 }, 255 { "xmtint", KSTAT_DATA_UINT64 }, 256 { "mdmint", KSTAT_DATA_UINT64 }, 257 { "rawch", KSTAT_DATA_UINT64 }, 258 { "canch", KSTAT_DATA_UINT64 }, 259 { "outch", KSTAT_DATA_UINT64 }, 260 { "msg", KSTAT_DATA_UINT64 }, 261 { "sema", KSTAT_DATA_UINT64 }, 262 { "namei", KSTAT_DATA_UINT64 }, 263 { "ufsiget", KSTAT_DATA_UINT64 }, 264 { "ufsdirblk", KSTAT_DATA_UINT64 }, 265 { "ufsipage", KSTAT_DATA_UINT64 }, 266 { "ufsinopage", KSTAT_DATA_UINT64 }, 267 { "procovf", KSTAT_DATA_UINT64 }, 268 { "intrthread", KSTAT_DATA_UINT64 }, 269 { "intrblk", KSTAT_DATA_UINT64 }, 270 { "intrunpin", KSTAT_DATA_UINT64 }, 271 { "idlethread", KSTAT_DATA_UINT64 }, 272 { "inv_swtch", KSTAT_DATA_UINT64 }, 273 { "nthreads", KSTAT_DATA_UINT64 }, 274 { "cpumigrate", KSTAT_DATA_UINT64 }, 275 { "xcalls", KSTAT_DATA_UINT64 }, 276 { "mutex_adenters", KSTAT_DATA_UINT64 }, 277 { "rw_rdfails", KSTAT_DATA_UINT64 }, 278 { "rw_wrfails", KSTAT_DATA_UINT64 }, 279 { "modload", KSTAT_DATA_UINT64 }, 280 { "modunload", KSTAT_DATA_UINT64 }, 281 { "bawrite", KSTAT_DATA_UINT64 }, 282 { "iowait", KSTAT_DATA_UINT64 }, 283 }; 284 285 static struct cpu_vm_stats_ks_data { 286 kstat_named_t pgrec; 287 kstat_named_t pgfrec; 288 kstat_named_t pgin; 289 kstat_named_t pgpgin; 290 kstat_named_t pgout; 291 kstat_named_t pgpgout; 292 kstat_named_t swapin; 293 kstat_named_t pgswapin; 294 kstat_named_t swapout; 295 kstat_named_t pgswapout; 296 kstat_named_t zfod; 297 kstat_named_t dfree; 298 kstat_named_t scan; 299 kstat_named_t rev; 300 kstat_named_t hat_fault; 301 kstat_named_t as_fault; 302 kstat_named_t maj_fault; 303 kstat_named_t cow_fault; 304 kstat_named_t prot_fault; 305 kstat_named_t softlock; 306 kstat_named_t kernel_asflt; 307 kstat_named_t pgrrun; 308 kstat_named_t execpgin; 309 kstat_named_t execpgout; 310 kstat_named_t execfree; 311 kstat_named_t anonpgin; 312 kstat_named_t anonpgout; 313 kstat_named_t anonfree; 314 kstat_named_t fspgin; 315 kstat_named_t fspgout; 316 kstat_named_t fsfree; 317 } cpu_vm_stats_ks_data_template = { 318 { "pgrec", KSTAT_DATA_UINT64 }, 319 { "pgfrec", KSTAT_DATA_UINT64 }, 320 { "pgin", KSTAT_DATA_UINT64 }, 321 { "pgpgin", KSTAT_DATA_UINT64 }, 322 { "pgout", KSTAT_DATA_UINT64 }, 323 { "pgpgout", KSTAT_DATA_UINT64 }, 324 { "swapin", KSTAT_DATA_UINT64 }, 325 { "pgswapin", KSTAT_DATA_UINT64 }, 326 { "swapout", KSTAT_DATA_UINT64 }, 327 { "pgswapout", KSTAT_DATA_UINT64 }, 328 { "zfod", KSTAT_DATA_UINT64 }, 329 { "dfree", KSTAT_DATA_UINT64 }, 330 { "scan", KSTAT_DATA_UINT64 }, 331 { "rev", KSTAT_DATA_UINT64 }, 332 { "hat_fault", KSTAT_DATA_UINT64 }, 333 { "as_fault", KSTAT_DATA_UINT64 }, 334 { "maj_fault", KSTAT_DATA_UINT64 }, 335 { "cow_fault", KSTAT_DATA_UINT64 }, 336 { "prot_fault", KSTAT_DATA_UINT64 }, 337 { "softlock", KSTAT_DATA_UINT64 }, 338 { "kernel_asflt", KSTAT_DATA_UINT64 }, 339 { "pgrrun", KSTAT_DATA_UINT64 }, 340 { "execpgin", KSTAT_DATA_UINT64 }, 341 { "execpgout", KSTAT_DATA_UINT64 }, 342 { "execfree", KSTAT_DATA_UINT64 }, 343 { "anonpgin", KSTAT_DATA_UINT64 }, 344 { "anonpgout", KSTAT_DATA_UINT64 }, 345 { "anonfree", KSTAT_DATA_UINT64 }, 346 { "fspgin", KSTAT_DATA_UINT64 }, 347 { "fspgout", KSTAT_DATA_UINT64 }, 348 { "fsfree", KSTAT_DATA_UINT64 }, 349 }; 350 351 /* 352 * Force the specified thread to migrate to the appropriate processor. 353 * Called with thread lock held, returns with it dropped. 354 */ 355 static void 356 force_thread_migrate(kthread_id_t tp) 357 { 358 ASSERT(THREAD_LOCK_HELD(tp)); 359 if (tp == curthread) { 360 THREAD_TRANSITION(tp); 361 CL_SETRUN(tp); 362 thread_unlock_nopreempt(tp); 363 swtch(); 364 } else { 365 if (tp->t_state == TS_ONPROC) { 366 cpu_surrender(tp); 367 } else if (tp->t_state == TS_RUN) { 368 (void) dispdeq(tp); 369 setbackdq(tp); 370 } 371 thread_unlock(tp); 372 } 373 } 374 375 /* 376 * Set affinity for a specified CPU. 377 * A reference count is incremented and the affinity is held until the 378 * reference count is decremented to zero by thread_affinity_clear(). 379 * This is so regions of code requiring affinity can be nested. 380 * Caller needs to ensure that cpu_id remains valid, which can be 381 * done by holding cpu_lock across this call, unless the caller 382 * specifies CPU_CURRENT in which case the cpu_lock will be acquired 383 * by thread_affinity_set and CPU->cpu_id will be the target CPU. 384 */ 385 void 386 thread_affinity_set(kthread_id_t t, int cpu_id) 387 { 388 cpu_t *cp; 389 int c; 390 391 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL)); 392 393 if ((c = cpu_id) == CPU_CURRENT) { 394 mutex_enter(&cpu_lock); 395 cpu_id = CPU->cpu_id; 396 } 397 /* 398 * We should be asserting that cpu_lock is held here, but 399 * the NCA code doesn't acquire it. The following assert 400 * should be uncommented when the NCA code is fixed. 401 * 402 * ASSERT(MUTEX_HELD(&cpu_lock)); 403 */ 404 ASSERT((cpu_id >= 0) && (cpu_id < NCPU)); 405 cp = cpu[cpu_id]; 406 ASSERT(cp != NULL); /* user must provide a good cpu_id */ 407 /* 408 * If there is already a hard affinity requested, and this affinity 409 * conflicts with that, panic. 410 */ 411 thread_lock(t); 412 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) { 413 panic("affinity_set: setting %p but already bound to %p", 414 (void *)cp, (void *)t->t_bound_cpu); 415 } 416 t->t_affinitycnt++; 417 t->t_bound_cpu = cp; 418 419 /* 420 * Make sure we're running on the right CPU. 421 */ 422 if (cp != t->t_cpu || t != curthread) { 423 force_thread_migrate(t); /* drops thread lock */ 424 } else { 425 thread_unlock(t); 426 } 427 428 if (c == CPU_CURRENT) 429 mutex_exit(&cpu_lock); 430 } 431 432 /* 433 * Wrapper for backward compatibility. 434 */ 435 void 436 affinity_set(int cpu_id) 437 { 438 thread_affinity_set(curthread, cpu_id); 439 } 440 441 /* 442 * Decrement the affinity reservation count and if it becomes zero, 443 * clear the CPU affinity for the current thread, or set it to the user's 444 * software binding request. 445 */ 446 void 447 thread_affinity_clear(kthread_id_t t) 448 { 449 register processorid_t binding; 450 451 thread_lock(t); 452 if (--t->t_affinitycnt == 0) { 453 if ((binding = t->t_bind_cpu) == PBIND_NONE) { 454 /* 455 * Adjust disp_max_unbound_pri if necessary. 456 */ 457 disp_adjust_unbound_pri(t); 458 t->t_bound_cpu = NULL; 459 if (t->t_cpu->cpu_part != t->t_cpupart) { 460 force_thread_migrate(t); 461 return; 462 } 463 } else { 464 t->t_bound_cpu = cpu[binding]; 465 /* 466 * Make sure the thread is running on the bound CPU. 467 */ 468 if (t->t_cpu != t->t_bound_cpu) { 469 force_thread_migrate(t); 470 return; /* already dropped lock */ 471 } 472 } 473 } 474 thread_unlock(t); 475 } 476 477 /* 478 * Wrapper for backward compatibility. 479 */ 480 void 481 affinity_clear(void) 482 { 483 thread_affinity_clear(curthread); 484 } 485 486 /* 487 * Weak cpu affinity. Bind to the "current" cpu for short periods 488 * of time during which the thread must not block (but may be preempted). 489 * Use this instead of kpreempt_disable() when it is only "no migration" 490 * rather than "no preemption" semantics that are required - disabling 491 * preemption holds higher priority threads off of cpu and if the 492 * operation that is protected is more than momentary this is not good 493 * for realtime etc. 494 * 495 * Weakly bound threads will not prevent a cpu from being offlined - 496 * we'll only run them on the cpu to which they are weakly bound but 497 * (because they do not block) we'll always be able to move them on to 498 * another cpu at offline time if we give them just a short moment to 499 * run during which they will unbind. To give a cpu a chance of offlining, 500 * however, we require a barrier to weak bindings that may be raised for a 501 * given cpu (offline/move code may set this and then wait a short time for 502 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier. 503 * 504 * There are few restrictions on the calling context of thread_nomigrate. 505 * The caller must not hold the thread lock. Calls may be nested. 506 * 507 * After weakbinding a thread must not perform actions that may block. 508 * In particular it must not call thread_affinity_set; calling that when 509 * already weakbound is nonsensical anyway. 510 * 511 * If curthread is prevented from migrating for other reasons 512 * (kernel preemption disabled; high pil; strongly bound; interrupt thread) 513 * then the weak binding will succeed even if this cpu is the target of an 514 * offline/move request. 515 */ 516 void 517 thread_nomigrate(void) 518 { 519 cpu_t *cp; 520 kthread_id_t t = curthread; 521 522 again: 523 kpreempt_disable(); 524 cp = CPU; 525 526 /* 527 * A highlevel interrupt must not modify t_nomigrate or 528 * t_weakbound_cpu of the thread it has interrupted. A lowlevel 529 * interrupt thread cannot migrate and we can avoid the 530 * thread_lock call below by short-circuiting here. In either 531 * case we can just return since no migration is possible and 532 * the condition will persist (ie, when we test for these again 533 * in thread_allowmigrate they can't have changed). Migration 534 * is also impossible if we're at or above DISP_LEVEL pil. 535 */ 536 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD || 537 getpil() >= DISP_LEVEL) { 538 kpreempt_enable(); 539 return; 540 } 541 542 /* 543 * We must be consistent with existing weak bindings. Since we 544 * may be interrupted between the increment of t_nomigrate and 545 * the store to t_weakbound_cpu below we cannot assume that 546 * t_weakbound_cpu will be set if t_nomigrate is. Note that we 547 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not 548 * always the case. 549 */ 550 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) { 551 if (!panicstr) 552 panic("thread_nomigrate: binding to %p but already " 553 "bound to %p", (void *)cp, 554 (void *)t->t_weakbound_cpu); 555 } 556 557 /* 558 * At this point we have preemption disabled and we don't yet hold 559 * the thread lock. So it's possible that somebody else could 560 * set t_bind_cpu here and not be able to force us across to the 561 * new cpu (since we have preemption disabled). 562 */ 563 thread_lock(curthread); 564 565 /* 566 * If further weak bindings are being (temporarily) suppressed then 567 * we'll settle for disabling kernel preemption (which assures 568 * no migration provided the thread does not block which it is 569 * not allowed to if using thread_nomigrate). We must remember 570 * this disposition so we can take appropriate action in 571 * thread_allowmigrate. If this is a nested call and the 572 * thread is already weakbound then fall through as normal. 573 * We remember the decision to settle for kpreempt_disable through 574 * negative nesting counting in t_nomigrate. Once a thread has had one 575 * weakbinding request satisfied in this way any further (nested) 576 * requests will continue to be satisfied in the same way, 577 * even if weak bindings have recommenced. 578 */ 579 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) { 580 --t->t_nomigrate; 581 thread_unlock(curthread); 582 return; /* with kpreempt_disable still active */ 583 } 584 585 /* 586 * We hold thread_lock so t_bind_cpu cannot change. We could, 587 * however, be running on a different cpu to which we are t_bound_cpu 588 * to (as explained above). If we grant the weak binding request 589 * in that case then the dispatcher must favour our weak binding 590 * over our strong (in which case, just as when preemption is 591 * disabled, we can continue to run on a cpu other than the one to 592 * which we are strongbound; the difference in this case is that 593 * this thread can be preempted and so can appear on the dispatch 594 * queues of a cpu other than the one it is strongbound to). 595 * 596 * If the cpu we are running on does not appear to be a current 597 * offline target (we check cpu_inmotion to determine this - since 598 * we don't hold cpu_lock we may not see a recent store to that, 599 * so it's possible that we at times can grant a weak binding to a 600 * cpu that is an offline target, but that one request will not 601 * prevent the offline from succeeding) then we will always grant 602 * the weak binding request. This includes the case above where 603 * we grant a weakbinding not commensurate with our strong binding. 604 * 605 * If our cpu does appear to be an offline target then we're inclined 606 * not to grant the weakbinding request just yet - we'd prefer to 607 * migrate to another cpu and grant the request there. The 608 * exceptions are those cases where going through preemption code 609 * will not result in us changing cpu: 610 * 611 * . interrupts have already bypassed this case (see above) 612 * . we are already weakbound to this cpu (dispatcher code will 613 * always return us to the weakbound cpu) 614 * . preemption was disabled even before we disabled it above 615 * . we are strongbound to this cpu (if we're strongbound to 616 * another and not yet running there the trip through the 617 * dispatcher will move us to the strongbound cpu and we 618 * will grant the weak binding there) 619 */ 620 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 || 621 t->t_bound_cpu == cp) { 622 /* 623 * Don't be tempted to store to t_weakbound_cpu only on 624 * the first nested bind request - if we're interrupted 625 * after the increment of t_nomigrate and before the 626 * store to t_weakbound_cpu and the interrupt calls 627 * thread_nomigrate then the assertion in thread_allowmigrate 628 * would fail. 629 */ 630 t->t_nomigrate++; 631 t->t_weakbound_cpu = cp; 632 membar_producer(); 633 thread_unlock(curthread); 634 /* 635 * Now that we have dropped the thread_lock another thread 636 * can set our t_weakbound_cpu, and will try to migrate us 637 * to the strongbound cpu (which will not be prevented by 638 * preemption being disabled since we're about to enable 639 * preemption). We have granted the weakbinding to the current 640 * cpu, so again we are in the position that is is is possible 641 * that our weak and strong bindings differ. Again this 642 * is catered for by dispatcher code which will favour our 643 * weak binding. 644 */ 645 kpreempt_enable(); 646 } else { 647 /* 648 * Move to another cpu before granting the request by 649 * forcing this thread through preemption code. When we 650 * get to set{front,back}dq called from CL_PREEMPT() 651 * cpu_choose() will be used to select a cpu to queue 652 * us on - that will see cpu_inmotion and take 653 * steps to avoid returning us to this cpu. 654 */ 655 cp->cpu_kprunrun = 1; 656 thread_unlock(curthread); 657 kpreempt_enable(); /* will call preempt() */ 658 goto again; 659 } 660 } 661 662 void 663 thread_allowmigrate(void) 664 { 665 kthread_id_t t = curthread; 666 667 ASSERT(t->t_weakbound_cpu == CPU || 668 (t->t_nomigrate < 0 && t->t_preempt > 0) || 669 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD || 670 getpil() >= DISP_LEVEL); 671 672 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) || 673 getpil() >= DISP_LEVEL) 674 return; 675 676 if (t->t_nomigrate < 0) { 677 /* 678 * This thread was granted "weak binding" in the 679 * stronger form of kernel preemption disabling. 680 * Undo a level of nesting for both t_nomigrate 681 * and t_preempt. 682 */ 683 ++t->t_nomigrate; 684 kpreempt_enable(); 685 } else if (--t->t_nomigrate == 0) { 686 /* 687 * Time to drop the weak binding. We need to cater 688 * for the case where we're weakbound to a different 689 * cpu than that to which we're strongbound (a very 690 * temporary arrangement that must only persist until 691 * weak binding drops). We don't acquire thread_lock 692 * here so even as this code executes t_bound_cpu 693 * may be changing. So we disable preemption and 694 * a) in the case that t_bound_cpu changes while we 695 * have preemption disabled kprunrun will be set 696 * asynchronously, and b) if before disabling 697 * preemption we were already on a different cpu to 698 * our t_bound_cpu then we set kprunrun ourselves 699 * to force a trip through the dispatcher when 700 * preemption is enabled. 701 */ 702 kpreempt_disable(); 703 if (t->t_bound_cpu && 704 t->t_weakbound_cpu != t->t_bound_cpu) 705 CPU->cpu_kprunrun = 1; 706 t->t_weakbound_cpu = NULL; 707 membar_producer(); 708 kpreempt_enable(); 709 } 710 } 711 712 /* 713 * weakbinding_stop can be used to temporarily cause weakbindings made 714 * with thread_nomigrate to be satisfied through the stronger action of 715 * kpreempt_disable. weakbinding_start recommences normal weakbinding. 716 */ 717 718 void 719 weakbinding_stop(void) 720 { 721 ASSERT(MUTEX_HELD(&cpu_lock)); 722 weakbindingbarrier = 1; 723 membar_producer(); /* make visible before subsequent thread_lock */ 724 } 725 726 void 727 weakbinding_start(void) 728 { 729 ASSERT(MUTEX_HELD(&cpu_lock)); 730 weakbindingbarrier = 0; 731 } 732 733 void 734 null_xcall(void) 735 { 736 } 737 738 /* 739 * This routine is called to place the CPUs in a safe place so that 740 * one of them can be taken off line or placed on line. What we are 741 * trying to do here is prevent a thread from traversing the list 742 * of active CPUs while we are changing it or from getting placed on 743 * the run queue of a CPU that has just gone off line. We do this by 744 * creating a thread with the highest possible prio for each CPU and 745 * having it call this routine. The advantage of this method is that 746 * we can eliminate all checks for CPU_ACTIVE in the disp routines. 747 * This makes disp faster at the expense of making p_online() slower 748 * which is a good trade off. 749 */ 750 static void 751 cpu_pause(int index) 752 { 753 int s; 754 struct _cpu_pause_info *cpi = &cpu_pause_info; 755 volatile char *safe = &safe_list[index]; 756 long lindex = index; 757 758 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE)); 759 760 while (*safe != PAUSE_DIE) { 761 *safe = PAUSE_READY; 762 membar_enter(); /* make sure stores are flushed */ 763 sema_v(&cpi->cp_sem); /* signal requesting thread */ 764 765 /* 766 * Wait here until all pause threads are running. That 767 * indicates that it's safe to do the spl. Until 768 * cpu_pause_info.cp_go is set, we don't want to spl 769 * because that might block clock interrupts needed 770 * to preempt threads on other CPUs. 771 */ 772 while (cpi->cp_go == 0) 773 ; 774 /* 775 * Even though we are at the highest disp prio, we need 776 * to block out all interrupts below LOCK_LEVEL so that 777 * an intr doesn't come in, wake up a thread, and call 778 * setbackdq/setfrontdq. 779 */ 780 s = splhigh(); 781 /* 782 * if cpu_pause_func() has been set then call it using 783 * index as the argument, currently only used by 784 * cpr_suspend_cpus(). This function is used as the 785 * code to execute on the "paused" cpu's when a machine 786 * comes out of a sleep state and CPU's were powered off. 787 * (could also be used for hotplugging CPU's). 788 */ 789 if (cpu_pause_func != NULL) 790 (*cpu_pause_func)((void *)lindex); 791 792 mach_cpu_pause(safe); 793 794 splx(s); 795 /* 796 * Waiting is at an end. Switch out of cpu_pause 797 * loop and resume useful work. 798 */ 799 swtch(); 800 } 801 802 mutex_enter(&pause_free_mutex); 803 *safe = PAUSE_DEAD; 804 cv_broadcast(&pause_free_cv); 805 mutex_exit(&pause_free_mutex); 806 } 807 808 /* 809 * Allow the cpus to start running again. 810 */ 811 void 812 start_cpus() 813 { 814 int i; 815 816 ASSERT(MUTEX_HELD(&cpu_lock)); 817 ASSERT(cpu_pause_info.cp_paused); 818 cpu_pause_info.cp_paused = NULL; 819 for (i = 0; i < NCPU; i++) 820 safe_list[i] = PAUSE_IDLE; 821 membar_enter(); /* make sure stores are flushed */ 822 affinity_clear(); 823 splx(cpu_pause_info.cp_spl); 824 kpreempt_enable(); 825 } 826 827 /* 828 * Allocate a pause thread for a CPU. 829 */ 830 static void 831 cpu_pause_alloc(cpu_t *cp) 832 { 833 kthread_id_t t; 834 long cpun = cp->cpu_id; 835 836 /* 837 * Note, v.v_nglobpris will not change value as long as I hold 838 * cpu_lock. 839 */ 840 t = thread_create(NULL, 0, cpu_pause, (void *)cpun, 841 0, &p0, TS_STOPPED, v.v_nglobpris - 1); 842 thread_lock(t); 843 t->t_bound_cpu = cp; 844 t->t_disp_queue = cp->cpu_disp; 845 t->t_affinitycnt = 1; 846 t->t_preempt = 1; 847 thread_unlock(t); 848 cp->cpu_pause_thread = t; 849 /* 850 * Registering a thread in the callback table is usually done 851 * in the initialization code of the thread. In this 852 * case, we do it right after thread creation because the 853 * thread itself may never run, and we need to register the 854 * fact that it is safe for cpr suspend. 855 */ 856 CALLB_CPR_INIT_SAFE(t, "cpu_pause"); 857 } 858 859 /* 860 * Free a pause thread for a CPU. 861 */ 862 static void 863 cpu_pause_free(cpu_t *cp) 864 { 865 kthread_id_t t; 866 int cpun = cp->cpu_id; 867 868 ASSERT(MUTEX_HELD(&cpu_lock)); 869 /* 870 * We have to get the thread and tell him to die. 871 */ 872 if ((t = cp->cpu_pause_thread) == NULL) { 873 ASSERT(safe_list[cpun] == PAUSE_IDLE); 874 return; 875 } 876 thread_lock(t); 877 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */ 878 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */ 879 t->t_pri = v.v_nglobpris - 1; 880 ASSERT(safe_list[cpun] == PAUSE_IDLE); 881 safe_list[cpun] = PAUSE_DIE; 882 THREAD_TRANSITION(t); 883 setbackdq(t); 884 thread_unlock_nopreempt(t); 885 886 /* 887 * If we don't wait for the thread to actually die, it may try to 888 * run on the wrong cpu as part of an actual call to pause_cpus(). 889 */ 890 mutex_enter(&pause_free_mutex); 891 while (safe_list[cpun] != PAUSE_DEAD) { 892 cv_wait(&pause_free_cv, &pause_free_mutex); 893 } 894 mutex_exit(&pause_free_mutex); 895 safe_list[cpun] = PAUSE_IDLE; 896 897 cp->cpu_pause_thread = NULL; 898 } 899 900 /* 901 * Initialize basic structures for pausing CPUs. 902 */ 903 void 904 cpu_pause_init() 905 { 906 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL); 907 /* 908 * Create initial CPU pause thread. 909 */ 910 cpu_pause_alloc(CPU); 911 } 912 913 /* 914 * Start the threads used to pause another CPU. 915 */ 916 static int 917 cpu_pause_start(processorid_t cpu_id) 918 { 919 int i; 920 int cpu_count = 0; 921 922 for (i = 0; i < NCPU; i++) { 923 cpu_t *cp; 924 kthread_id_t t; 925 926 cp = cpu[i]; 927 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) { 928 safe_list[i] = PAUSE_WAIT; 929 continue; 930 } 931 932 /* 933 * Skip CPU if it is quiesced or not yet started. 934 */ 935 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) { 936 safe_list[i] = PAUSE_WAIT; 937 continue; 938 } 939 940 /* 941 * Start this CPU's pause thread. 942 */ 943 t = cp->cpu_pause_thread; 944 thread_lock(t); 945 /* 946 * Reset the priority, since nglobpris may have 947 * changed since the thread was created, if someone 948 * has loaded the RT (or some other) scheduling 949 * class. 950 */ 951 t->t_pri = v.v_nglobpris - 1; 952 THREAD_TRANSITION(t); 953 setbackdq(t); 954 thread_unlock_nopreempt(t); 955 ++cpu_count; 956 } 957 return (cpu_count); 958 } 959 960 961 /* 962 * Pause all of the CPUs except the one we are on by creating a high 963 * priority thread bound to those CPUs. 964 * 965 * Note that one must be extremely careful regarding code 966 * executed while CPUs are paused. Since a CPU may be paused 967 * while a thread scheduling on that CPU is holding an adaptive 968 * lock, code executed with CPUs paused must not acquire adaptive 969 * (or low-level spin) locks. Also, such code must not block, 970 * since the thread that is supposed to initiate the wakeup may 971 * never run. 972 * 973 * With a few exceptions, the restrictions on code executed with CPUs 974 * paused match those for code executed at high-level interrupt 975 * context. 976 */ 977 void 978 pause_cpus(cpu_t *off_cp) 979 { 980 processorid_t cpu_id; 981 int i; 982 struct _cpu_pause_info *cpi = &cpu_pause_info; 983 984 ASSERT(MUTEX_HELD(&cpu_lock)); 985 ASSERT(cpi->cp_paused == NULL); 986 cpi->cp_count = 0; 987 cpi->cp_go = 0; 988 for (i = 0; i < NCPU; i++) 989 safe_list[i] = PAUSE_IDLE; 990 kpreempt_disable(); 991 992 /* 993 * If running on the cpu that is going offline, get off it. 994 * This is so that it won't be necessary to rechoose a CPU 995 * when done. 996 */ 997 if (CPU == off_cp) 998 cpu_id = off_cp->cpu_next_part->cpu_id; 999 else 1000 cpu_id = CPU->cpu_id; 1001 affinity_set(cpu_id); 1002 1003 /* 1004 * Start the pause threads and record how many were started 1005 */ 1006 cpi->cp_count = cpu_pause_start(cpu_id); 1007 1008 /* 1009 * Now wait for all CPUs to be running the pause thread. 1010 */ 1011 while (cpi->cp_count > 0) { 1012 /* 1013 * Spin reading the count without grabbing the disp 1014 * lock to make sure we don't prevent the pause 1015 * threads from getting the lock. 1016 */ 1017 while (sema_held(&cpi->cp_sem)) 1018 ; 1019 if (sema_tryp(&cpi->cp_sem)) 1020 --cpi->cp_count; 1021 } 1022 cpi->cp_go = 1; /* all have reached cpu_pause */ 1023 1024 /* 1025 * Now wait for all CPUs to spl. (Transition from PAUSE_READY 1026 * to PAUSE_WAIT.) 1027 */ 1028 for (i = 0; i < NCPU; i++) { 1029 while (safe_list[i] != PAUSE_WAIT) 1030 ; 1031 } 1032 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */ 1033 cpi->cp_paused = curthread; 1034 } 1035 1036 /* 1037 * Check whether the current thread has CPUs paused 1038 */ 1039 int 1040 cpus_paused(void) 1041 { 1042 if (cpu_pause_info.cp_paused != NULL) { 1043 ASSERT(cpu_pause_info.cp_paused == curthread); 1044 return (1); 1045 } 1046 return (0); 1047 } 1048 1049 static cpu_t * 1050 cpu_get_all(processorid_t cpun) 1051 { 1052 ASSERT(MUTEX_HELD(&cpu_lock)); 1053 1054 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun)) 1055 return (NULL); 1056 return (cpu[cpun]); 1057 } 1058 1059 /* 1060 * Check whether cpun is a valid processor id and whether it should be 1061 * visible from the current zone. If it is, return a pointer to the 1062 * associated CPU structure. 1063 */ 1064 cpu_t * 1065 cpu_get(processorid_t cpun) 1066 { 1067 cpu_t *c; 1068 1069 ASSERT(MUTEX_HELD(&cpu_lock)); 1070 c = cpu_get_all(cpun); 1071 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 1072 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c)) 1073 return (NULL); 1074 return (c); 1075 } 1076 1077 /* 1078 * The following functions should be used to check CPU states in the kernel. 1079 * They should be invoked with cpu_lock held. Kernel subsystems interested 1080 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc 1081 * states. Those are for user-land (and system call) use only. 1082 */ 1083 1084 /* 1085 * Determine whether the CPU is online and handling interrupts. 1086 */ 1087 int 1088 cpu_is_online(cpu_t *cpu) 1089 { 1090 ASSERT(MUTEX_HELD(&cpu_lock)); 1091 return (cpu_flagged_online(cpu->cpu_flags)); 1092 } 1093 1094 /* 1095 * Determine whether the CPU is offline (this includes spare and faulted). 1096 */ 1097 int 1098 cpu_is_offline(cpu_t *cpu) 1099 { 1100 ASSERT(MUTEX_HELD(&cpu_lock)); 1101 return (cpu_flagged_offline(cpu->cpu_flags)); 1102 } 1103 1104 /* 1105 * Determine whether the CPU is powered off. 1106 */ 1107 int 1108 cpu_is_poweredoff(cpu_t *cpu) 1109 { 1110 ASSERT(MUTEX_HELD(&cpu_lock)); 1111 return (cpu_flagged_poweredoff(cpu->cpu_flags)); 1112 } 1113 1114 /* 1115 * Determine whether the CPU is handling interrupts. 1116 */ 1117 int 1118 cpu_is_nointr(cpu_t *cpu) 1119 { 1120 ASSERT(MUTEX_HELD(&cpu_lock)); 1121 return (cpu_flagged_nointr(cpu->cpu_flags)); 1122 } 1123 1124 /* 1125 * Determine whether the CPU is active (scheduling threads). 1126 */ 1127 int 1128 cpu_is_active(cpu_t *cpu) 1129 { 1130 ASSERT(MUTEX_HELD(&cpu_lock)); 1131 return (cpu_flagged_active(cpu->cpu_flags)); 1132 } 1133 1134 /* 1135 * Same as above, but these require cpu_flags instead of cpu_t pointers. 1136 */ 1137 int 1138 cpu_flagged_online(cpu_flag_t cpu_flags) 1139 { 1140 return (cpu_flagged_active(cpu_flags) && 1141 (cpu_flags & CPU_ENABLE)); 1142 } 1143 1144 int 1145 cpu_flagged_offline(cpu_flag_t cpu_flags) 1146 { 1147 return (((cpu_flags & CPU_POWEROFF) == 0) && 1148 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)); 1149 } 1150 1151 int 1152 cpu_flagged_poweredoff(cpu_flag_t cpu_flags) 1153 { 1154 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF); 1155 } 1156 1157 int 1158 cpu_flagged_nointr(cpu_flag_t cpu_flags) 1159 { 1160 return (cpu_flagged_active(cpu_flags) && 1161 (cpu_flags & CPU_ENABLE) == 0); 1162 } 1163 1164 int 1165 cpu_flagged_active(cpu_flag_t cpu_flags) 1166 { 1167 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) && 1168 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY)); 1169 } 1170 1171 /* 1172 * Bring the indicated CPU online. 1173 */ 1174 int 1175 cpu_online(cpu_t *cp) 1176 { 1177 int error = 0; 1178 1179 /* 1180 * Handle on-line request. 1181 * This code must put the new CPU on the active list before 1182 * starting it because it will not be paused, and will start 1183 * using the active list immediately. The real start occurs 1184 * when the CPU_QUIESCED flag is turned off. 1185 */ 1186 1187 ASSERT(MUTEX_HELD(&cpu_lock)); 1188 1189 /* 1190 * Put all the cpus into a known safe place. 1191 * No mutexes can be entered while CPUs are paused. 1192 */ 1193 error = mp_cpu_start(cp); /* arch-dep hook */ 1194 if (error == 0) { 1195 pg_cpupart_in(cp, cp->cpu_part); 1196 pause_cpus(NULL); 1197 cpu_add_active_internal(cp); 1198 if (cp->cpu_flags & CPU_FAULTED) { 1199 cp->cpu_flags &= ~CPU_FAULTED; 1200 mp_cpu_faulted_exit(cp); 1201 } 1202 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN | 1203 CPU_SPARE); 1204 start_cpus(); 1205 cpu_stats_kstat_create(cp); 1206 cpu_create_intrstat(cp); 1207 lgrp_kstat_create(cp); 1208 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1209 cpu_intr_enable(cp); /* arch-dep hook */ 1210 cpu_set_state(cp); 1211 cyclic_online(cp); 1212 /* 1213 * This has to be called only after cyclic_online(). This 1214 * function uses cyclics. 1215 */ 1216 callout_cpu_online(cp); 1217 poke_cpu(cp->cpu_id); 1218 } 1219 1220 return (error); 1221 } 1222 1223 /* 1224 * Take the indicated CPU offline. 1225 */ 1226 int 1227 cpu_offline(cpu_t *cp, int flags) 1228 { 1229 cpupart_t *pp; 1230 int error = 0; 1231 cpu_t *ncp; 1232 int intr_enable; 1233 int cyclic_off = 0; 1234 int loop_count; 1235 int no_quiesce = 0; 1236 int (*bound_func)(struct cpu *, int); 1237 kthread_t *t; 1238 lpl_t *cpu_lpl; 1239 proc_t *p; 1240 int lgrp_diff_lpl; 1241 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0; 1242 1243 ASSERT(MUTEX_HELD(&cpu_lock)); 1244 1245 /* 1246 * If we're going from faulted or spare to offline, just 1247 * clear these flags and update CPU state. 1248 */ 1249 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) { 1250 if (cp->cpu_flags & CPU_FAULTED) { 1251 cp->cpu_flags &= ~CPU_FAULTED; 1252 mp_cpu_faulted_exit(cp); 1253 } 1254 cp->cpu_flags &= ~CPU_SPARE; 1255 cpu_set_state(cp); 1256 return (0); 1257 } 1258 1259 /* 1260 * Handle off-line request. 1261 */ 1262 pp = cp->cpu_part; 1263 /* 1264 * Don't offline last online CPU in partition 1265 */ 1266 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2) 1267 return (EBUSY); 1268 /* 1269 * Unbind all soft-bound threads bound to our CPU and hard bound threads 1270 * if we were asked to. 1271 */ 1272 error = cpu_unbind(cp->cpu_id, unbind_all_threads); 1273 if (error != 0) 1274 return (error); 1275 /* 1276 * We shouldn't be bound to this CPU ourselves. 1277 */ 1278 if (curthread->t_bound_cpu == cp) 1279 return (EBUSY); 1280 1281 /* 1282 * Tell interested parties that this CPU is going offline. 1283 */ 1284 cpu_state_change_notify(cp->cpu_id, CPU_OFF); 1285 1286 /* 1287 * Tell the PG subsystem that the CPU is leaving the partition 1288 */ 1289 pg_cpupart_out(cp, pp); 1290 1291 /* 1292 * Take the CPU out of interrupt participation so we won't find 1293 * bound kernel threads. If the architecture cannot completely 1294 * shut off interrupts on the CPU, don't quiesce it, but don't 1295 * run anything but interrupt thread... this is indicated by 1296 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being 1297 * off. 1298 */ 1299 intr_enable = cp->cpu_flags & CPU_ENABLE; 1300 if (intr_enable) 1301 no_quiesce = cpu_intr_disable(cp); 1302 1303 /* 1304 * Record that we are aiming to offline this cpu. This acts as 1305 * a barrier to further weak binding requests in thread_nomigrate 1306 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to 1307 * lean away from this cpu. Further strong bindings are already 1308 * avoided since we hold cpu_lock. Since threads that are set 1309 * runnable around now and others coming off the target cpu are 1310 * directed away from the target, existing strong and weak bindings 1311 * (especially the latter) to the target cpu stand maximum chance of 1312 * being able to unbind during the short delay loop below (if other 1313 * unbound threads compete they may not see cpu in time to unbind 1314 * even if they would do so immediately. 1315 */ 1316 cpu_inmotion = cp; 1317 membar_enter(); 1318 1319 /* 1320 * Check for kernel threads (strong or weak) bound to that CPU. 1321 * Strongly bound threads may not unbind, and we'll have to return 1322 * EBUSY. Weakly bound threads should always disappear - we've 1323 * stopped more weak binding with cpu_inmotion and existing 1324 * bindings will drain imminently (they may not block). Nonetheless 1325 * we will wait for a fixed period for all bound threads to disappear. 1326 * Inactive interrupt threads are OK (they'll be in TS_FREE 1327 * state). If test finds some bound threads, wait a few ticks 1328 * to give short-lived threads (such as interrupts) chance to 1329 * complete. Note that if no_quiesce is set, i.e. this cpu 1330 * is required to service interrupts, then we take the route 1331 * that permits interrupt threads to be active (or bypassed). 1332 */ 1333 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads; 1334 1335 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { 1336 if (loop_count >= 5) { 1337 error = EBUSY; /* some threads still bound */ 1338 break; 1339 } 1340 1341 /* 1342 * If some threads were assigned, give them 1343 * a chance to complete or move. 1344 * 1345 * This assumes that the clock_thread is not bound 1346 * to any CPU, because the clock_thread is needed to 1347 * do the delay(hz/100). 1348 * 1349 * Note: we still hold the cpu_lock while waiting for 1350 * the next clock tick. This is OK since it isn't 1351 * needed for anything else except processor_bind(2), 1352 * and system initialization. If we drop the lock, 1353 * we would risk another p_online disabling the last 1354 * processor. 1355 */ 1356 delay(hz/100); 1357 } 1358 1359 if (error == 0 && cyclic_off == 0) { 1360 if (!cyclic_offline(cp)) { 1361 /* 1362 * We must have bound cyclics... 1363 */ 1364 error = EBUSY; 1365 goto out; 1366 } 1367 cyclic_off = 1; 1368 } 1369 1370 /* 1371 * Call mp_cpu_stop() to perform any special operations 1372 * needed for this machine architecture to offline a CPU. 1373 */ 1374 if (error == 0) 1375 error = mp_cpu_stop(cp); /* arch-dep hook */ 1376 1377 /* 1378 * If that all worked, take the CPU offline and decrement 1379 * ncpus_online. 1380 */ 1381 if (error == 0) { 1382 /* 1383 * Put all the cpus into a known safe place. 1384 * No mutexes can be entered while CPUs are paused. 1385 */ 1386 pause_cpus(cp); 1387 /* 1388 * Repeat the operation, if necessary, to make sure that 1389 * all outstanding low-level interrupts run to completion 1390 * before we set the CPU_QUIESCED flag. It's also possible 1391 * that a thread has weak bound to the cpu despite our raising 1392 * cpu_inmotion above since it may have loaded that 1393 * value before the barrier became visible (this would have 1394 * to be the thread that was on the target cpu at the time 1395 * we raised the barrier). 1396 */ 1397 if ((!no_quiesce && cp->cpu_intr_actv != 0) || 1398 (*bound_func)(cp, 1)) { 1399 start_cpus(); 1400 (void) mp_cpu_start(cp); 1401 goto again; 1402 } 1403 ncp = cp->cpu_next_part; 1404 cpu_lpl = cp->cpu_lpl; 1405 ASSERT(cpu_lpl != NULL); 1406 1407 /* 1408 * Remove the CPU from the list of active CPUs. 1409 */ 1410 cpu_remove_active(cp); 1411 1412 /* 1413 * Walk the active process list and look for threads 1414 * whose home lgroup needs to be updated, or 1415 * the last CPU they run on is the one being offlined now. 1416 */ 1417 1418 ASSERT(curthread->t_cpu != cp); 1419 for (p = practive; p != NULL; p = p->p_next) { 1420 1421 t = p->p_tlist; 1422 1423 if (t == NULL) 1424 continue; 1425 1426 lgrp_diff_lpl = 0; 1427 1428 do { 1429 ASSERT(t->t_lpl != NULL); 1430 /* 1431 * Taking last CPU in lpl offline 1432 * Rehome thread if it is in this lpl 1433 * Otherwise, update the count of how many 1434 * threads are in this CPU's lgroup but have 1435 * a different lpl. 1436 */ 1437 1438 if (cpu_lpl->lpl_ncpu == 0) { 1439 if (t->t_lpl == cpu_lpl) 1440 lgrp_move_thread(t, 1441 lgrp_choose(t, 1442 t->t_cpupart), 0); 1443 else if (t->t_lpl->lpl_lgrpid == 1444 cpu_lpl->lpl_lgrpid) 1445 lgrp_diff_lpl++; 1446 } 1447 ASSERT(t->t_lpl->lpl_ncpu > 0); 1448 1449 /* 1450 * Update CPU last ran on if it was this CPU 1451 */ 1452 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1453 t->t_cpu = disp_lowpri_cpu(ncp, 1454 t->t_lpl, t->t_pri, NULL); 1455 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1456 t->t_weakbound_cpu == cp); 1457 1458 t = t->t_forw; 1459 } while (t != p->p_tlist); 1460 1461 /* 1462 * Didn't find any threads in the same lgroup as this 1463 * CPU with a different lpl, so remove the lgroup from 1464 * the process lgroup bitmask. 1465 */ 1466 1467 if (lgrp_diff_lpl == 0) 1468 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid); 1469 } 1470 1471 /* 1472 * Walk thread list looking for threads that need to be 1473 * rehomed, since there are some threads that are not in 1474 * their process's p_tlist. 1475 */ 1476 1477 t = curthread; 1478 do { 1479 ASSERT(t != NULL && t->t_lpl != NULL); 1480 1481 /* 1482 * Rehome threads with same lpl as this CPU when this 1483 * is the last CPU in the lpl. 1484 */ 1485 1486 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl)) 1487 lgrp_move_thread(t, 1488 lgrp_choose(t, t->t_cpupart), 1); 1489 1490 ASSERT(t->t_lpl->lpl_ncpu > 0); 1491 1492 /* 1493 * Update CPU last ran on if it was this CPU 1494 */ 1495 1496 if (t->t_cpu == cp && t->t_bound_cpu != cp) { 1497 t->t_cpu = disp_lowpri_cpu(ncp, 1498 t->t_lpl, t->t_pri, NULL); 1499 } 1500 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1501 t->t_weakbound_cpu == cp); 1502 t = t->t_next; 1503 1504 } while (t != curthread); 1505 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0); 1506 cp->cpu_flags |= CPU_OFFLINE; 1507 disp_cpu_inactive(cp); 1508 if (!no_quiesce) 1509 cp->cpu_flags |= CPU_QUIESCED; 1510 ncpus_online--; 1511 cpu_set_state(cp); 1512 cpu_inmotion = NULL; 1513 start_cpus(); 1514 cpu_stats_kstat_destroy(cp); 1515 cpu_delete_intrstat(cp); 1516 lgrp_kstat_destroy(cp); 1517 } 1518 1519 out: 1520 cpu_inmotion = NULL; 1521 1522 /* 1523 * If we failed, re-enable interrupts. 1524 * Do this even if cpu_intr_disable returned an error, because 1525 * it may have partially disabled interrupts. 1526 */ 1527 if (error && intr_enable) 1528 cpu_intr_enable(cp); 1529 1530 /* 1531 * If we failed, but managed to offline the cyclic subsystem on this 1532 * CPU, bring it back online. 1533 */ 1534 if (error && cyclic_off) 1535 cyclic_online(cp); 1536 1537 /* 1538 * If we failed, tell the PG subsystem that the CPU is back 1539 */ 1540 pg_cpupart_in(cp, pp); 1541 1542 /* 1543 * If we failed, we need to notify everyone that this CPU is back on. 1544 */ 1545 if (error != 0) 1546 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1547 1548 return (error); 1549 } 1550 1551 /* 1552 * Mark the indicated CPU as faulted, taking it offline. 1553 */ 1554 int 1555 cpu_faulted(cpu_t *cp, int flags) 1556 { 1557 int error = 0; 1558 1559 ASSERT(MUTEX_HELD(&cpu_lock)); 1560 ASSERT(!cpu_is_poweredoff(cp)); 1561 1562 if (cpu_is_offline(cp)) { 1563 cp->cpu_flags &= ~CPU_SPARE; 1564 cp->cpu_flags |= CPU_FAULTED; 1565 mp_cpu_faulted_enter(cp); 1566 cpu_set_state(cp); 1567 return (0); 1568 } 1569 1570 if ((error = cpu_offline(cp, flags)) == 0) { 1571 cp->cpu_flags |= CPU_FAULTED; 1572 mp_cpu_faulted_enter(cp); 1573 cpu_set_state(cp); 1574 } 1575 1576 return (error); 1577 } 1578 1579 /* 1580 * Mark the indicated CPU as a spare, taking it offline. 1581 */ 1582 int 1583 cpu_spare(cpu_t *cp, int flags) 1584 { 1585 int error = 0; 1586 1587 ASSERT(MUTEX_HELD(&cpu_lock)); 1588 ASSERT(!cpu_is_poweredoff(cp)); 1589 1590 if (cpu_is_offline(cp)) { 1591 if (cp->cpu_flags & CPU_FAULTED) { 1592 cp->cpu_flags &= ~CPU_FAULTED; 1593 mp_cpu_faulted_exit(cp); 1594 } 1595 cp->cpu_flags |= CPU_SPARE; 1596 cpu_set_state(cp); 1597 return (0); 1598 } 1599 1600 if ((error = cpu_offline(cp, flags)) == 0) { 1601 cp->cpu_flags |= CPU_SPARE; 1602 cpu_set_state(cp); 1603 } 1604 1605 return (error); 1606 } 1607 1608 /* 1609 * Take the indicated CPU from poweroff to offline. 1610 */ 1611 int 1612 cpu_poweron(cpu_t *cp) 1613 { 1614 int error = ENOTSUP; 1615 1616 ASSERT(MUTEX_HELD(&cpu_lock)); 1617 ASSERT(cpu_is_poweredoff(cp)); 1618 1619 error = mp_cpu_poweron(cp); /* arch-dep hook */ 1620 if (error == 0) 1621 cpu_set_state(cp); 1622 1623 return (error); 1624 } 1625 1626 /* 1627 * Take the indicated CPU from any inactive state to powered off. 1628 */ 1629 int 1630 cpu_poweroff(cpu_t *cp) 1631 { 1632 int error = ENOTSUP; 1633 1634 ASSERT(MUTEX_HELD(&cpu_lock)); 1635 ASSERT(cpu_is_offline(cp)); 1636 1637 if (!(cp->cpu_flags & CPU_QUIESCED)) 1638 return (EBUSY); /* not completely idle */ 1639 1640 error = mp_cpu_poweroff(cp); /* arch-dep hook */ 1641 if (error == 0) 1642 cpu_set_state(cp); 1643 1644 return (error); 1645 } 1646 1647 /* 1648 * Initialize the Sequential CPU id lookup table 1649 */ 1650 void 1651 cpu_seq_tbl_init() 1652 { 1653 cpu_t **tbl; 1654 1655 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP); 1656 tbl[0] = CPU; 1657 1658 cpu_seq = tbl; 1659 } 1660 1661 /* 1662 * Initialize the CPU lists for the first CPU. 1663 */ 1664 void 1665 cpu_list_init(cpu_t *cp) 1666 { 1667 cp->cpu_next = cp; 1668 cp->cpu_prev = cp; 1669 cpu_list = cp; 1670 clock_cpu_list = cp; 1671 1672 cp->cpu_next_onln = cp; 1673 cp->cpu_prev_onln = cp; 1674 cpu_active = cp; 1675 1676 cp->cpu_seqid = 0; 1677 CPUSET_ADD(cpu_seqid_inuse, 0); 1678 1679 /* 1680 * Bootstrap cpu_seq using cpu_list 1681 * The cpu_seq[] table will be dynamically allocated 1682 * when kmem later becomes available (but before going MP) 1683 */ 1684 cpu_seq = &cpu_list; 1685 1686 cp->cpu_cache_offset = KMEM_CACHE_SIZE(cp->cpu_seqid); 1687 cp_default.cp_cpulist = cp; 1688 cp_default.cp_ncpus = 1; 1689 cp->cpu_next_part = cp; 1690 cp->cpu_prev_part = cp; 1691 cp->cpu_part = &cp_default; 1692 1693 CPUSET_ADD(cpu_available, cp->cpu_id); 1694 } 1695 1696 /* 1697 * Insert a CPU into the list of available CPUs. 1698 */ 1699 void 1700 cpu_add_unit(cpu_t *cp) 1701 { 1702 int seqid; 1703 1704 ASSERT(MUTEX_HELD(&cpu_lock)); 1705 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1706 1707 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0); 1708 1709 /* 1710 * Note: most users of the cpu_list will grab the 1711 * cpu_lock to insure that it isn't modified. However, 1712 * certain users can't or won't do that. To allow this 1713 * we pause the other cpus. Users who walk the list 1714 * without cpu_lock, must disable kernel preemption 1715 * to insure that the list isn't modified underneath 1716 * them. Also, any cached pointers to cpu structures 1717 * must be revalidated by checking to see if the 1718 * cpu_next pointer points to itself. This check must 1719 * be done with the cpu_lock held or kernel preemption 1720 * disabled. This check relies upon the fact that 1721 * old cpu structures are not free'ed or cleared after 1722 * then are removed from the cpu_list. 1723 * 1724 * Note that the clock code walks the cpu list dereferencing 1725 * the cpu_part pointer, so we need to initialize it before 1726 * adding the cpu to the list. 1727 */ 1728 cp->cpu_part = &cp_default; 1729 (void) pause_cpus(NULL); 1730 cp->cpu_next = cpu_list; 1731 cp->cpu_prev = cpu_list->cpu_prev; 1732 cpu_list->cpu_prev->cpu_next = cp; 1733 cpu_list->cpu_prev = cp; 1734 start_cpus(); 1735 1736 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++) 1737 continue; 1738 CPUSET_ADD(cpu_seqid_inuse, seqid); 1739 cp->cpu_seqid = seqid; 1740 ASSERT(ncpus < max_ncpus); 1741 ncpus++; 1742 cp->cpu_cache_offset = KMEM_CACHE_SIZE(cp->cpu_seqid); 1743 cpu[cp->cpu_id] = cp; 1744 CPUSET_ADD(cpu_available, cp->cpu_id); 1745 cpu_seq[cp->cpu_seqid] = cp; 1746 1747 /* 1748 * allocate a pause thread for this CPU. 1749 */ 1750 cpu_pause_alloc(cp); 1751 1752 /* 1753 * So that new CPUs won't have NULL prev_onln and next_onln pointers, 1754 * link them into a list of just that CPU. 1755 * This is so that disp_lowpri_cpu will work for thread_create in 1756 * pause_cpus() when called from the startup thread in a new CPU. 1757 */ 1758 cp->cpu_next_onln = cp; 1759 cp->cpu_prev_onln = cp; 1760 cpu_info_kstat_create(cp); 1761 cp->cpu_next_part = cp; 1762 cp->cpu_prev_part = cp; 1763 1764 init_cpu_mstate(cp, CMS_SYSTEM); 1765 1766 pool_pset_mod = gethrtime(); 1767 } 1768 1769 /* 1770 * Do the opposite of cpu_add_unit(). 1771 */ 1772 void 1773 cpu_del_unit(int cpuid) 1774 { 1775 struct cpu *cp, *cpnext; 1776 1777 ASSERT(MUTEX_HELD(&cpu_lock)); 1778 cp = cpu[cpuid]; 1779 ASSERT(cp != NULL); 1780 1781 ASSERT(cp->cpu_next_onln == cp); 1782 ASSERT(cp->cpu_prev_onln == cp); 1783 ASSERT(cp->cpu_next_part == cp); 1784 ASSERT(cp->cpu_prev_part == cp); 1785 1786 /* 1787 * Tear down the CPU's physical ID cache, and update any 1788 * processor groups 1789 */ 1790 pg_cpu_fini(cp); 1791 pghw_physid_destroy(cp); 1792 1793 /* 1794 * Destroy kstat stuff. 1795 */ 1796 cpu_info_kstat_destroy(cp); 1797 term_cpu_mstate(cp); 1798 /* 1799 * Free up pause thread. 1800 */ 1801 cpu_pause_free(cp); 1802 CPUSET_DEL(cpu_available, cp->cpu_id); 1803 cpu[cp->cpu_id] = NULL; 1804 cpu_seq[cp->cpu_seqid] = NULL; 1805 1806 /* 1807 * The clock thread and mutex_vector_enter cannot hold the 1808 * cpu_lock while traversing the cpu list, therefore we pause 1809 * all other threads by pausing the other cpus. These, and any 1810 * other routines holding cpu pointers while possibly sleeping 1811 * must be sure to call kpreempt_disable before processing the 1812 * list and be sure to check that the cpu has not been deleted 1813 * after any sleeps (check cp->cpu_next != NULL). We guarantee 1814 * to keep the deleted cpu structure around. 1815 * 1816 * Note that this MUST be done AFTER cpu_available 1817 * has been updated so that we don't waste time 1818 * trying to pause the cpu we're trying to delete. 1819 */ 1820 (void) pause_cpus(NULL); 1821 1822 cpnext = cp->cpu_next; 1823 cp->cpu_prev->cpu_next = cp->cpu_next; 1824 cp->cpu_next->cpu_prev = cp->cpu_prev; 1825 if (cp == cpu_list) 1826 cpu_list = cpnext; 1827 1828 /* 1829 * Signals that the cpu has been deleted (see above). 1830 */ 1831 cp->cpu_next = NULL; 1832 cp->cpu_prev = NULL; 1833 1834 start_cpus(); 1835 1836 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid); 1837 ncpus--; 1838 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0); 1839 1840 pool_pset_mod = gethrtime(); 1841 } 1842 1843 /* 1844 * Add a CPU to the list of active CPUs. 1845 * This routine must not get any locks, because other CPUs are paused. 1846 */ 1847 static void 1848 cpu_add_active_internal(cpu_t *cp) 1849 { 1850 cpupart_t *pp = cp->cpu_part; 1851 1852 ASSERT(MUTEX_HELD(&cpu_lock)); 1853 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1854 1855 ncpus_online++; 1856 cpu_set_state(cp); 1857 cp->cpu_next_onln = cpu_active; 1858 cp->cpu_prev_onln = cpu_active->cpu_prev_onln; 1859 cpu_active->cpu_prev_onln->cpu_next_onln = cp; 1860 cpu_active->cpu_prev_onln = cp; 1861 1862 if (pp->cp_cpulist) { 1863 cp->cpu_next_part = pp->cp_cpulist; 1864 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part; 1865 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp; 1866 pp->cp_cpulist->cpu_prev_part = cp; 1867 } else { 1868 ASSERT(pp->cp_ncpus == 0); 1869 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 1870 } 1871 pp->cp_ncpus++; 1872 if (pp->cp_ncpus == 1) { 1873 cp_numparts_nonempty++; 1874 ASSERT(cp_numparts_nonempty != 0); 1875 } 1876 1877 pg_cpu_active(cp); 1878 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); 1879 1880 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); 1881 } 1882 1883 /* 1884 * Add a CPU to the list of active CPUs. 1885 * This is called from machine-dependent layers when a new CPU is started. 1886 */ 1887 void 1888 cpu_add_active(cpu_t *cp) 1889 { 1890 pg_cpupart_in(cp, cp->cpu_part); 1891 1892 pause_cpus(NULL); 1893 cpu_add_active_internal(cp); 1894 start_cpus(); 1895 1896 cpu_stats_kstat_create(cp); 1897 cpu_create_intrstat(cp); 1898 lgrp_kstat_create(cp); 1899 cpu_state_change_notify(cp->cpu_id, CPU_INIT); 1900 } 1901 1902 1903 /* 1904 * Remove a CPU from the list of active CPUs. 1905 * This routine must not get any locks, because other CPUs are paused. 1906 */ 1907 /* ARGSUSED */ 1908 static void 1909 cpu_remove_active(cpu_t *cp) 1910 { 1911 cpupart_t *pp = cp->cpu_part; 1912 1913 ASSERT(MUTEX_HELD(&cpu_lock)); 1914 ASSERT(cp->cpu_next_onln != cp); /* not the last one */ 1915 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ 1916 1917 pg_cpu_inactive(cp); 1918 1919 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); 1920 1921 if (cp == clock_cpu_list) 1922 clock_cpu_list = cp->cpu_next_onln; 1923 1924 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln; 1925 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln; 1926 if (cpu_active == cp) { 1927 cpu_active = cp->cpu_next_onln; 1928 } 1929 cp->cpu_next_onln = cp; 1930 cp->cpu_prev_onln = cp; 1931 1932 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 1933 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 1934 if (pp->cp_cpulist == cp) { 1935 pp->cp_cpulist = cp->cpu_next_part; 1936 ASSERT(pp->cp_cpulist != cp); 1937 } 1938 cp->cpu_next_part = cp; 1939 cp->cpu_prev_part = cp; 1940 pp->cp_ncpus--; 1941 if (pp->cp_ncpus == 0) { 1942 cp_numparts_nonempty--; 1943 ASSERT(cp_numparts_nonempty != 0); 1944 } 1945 } 1946 1947 /* 1948 * Routine used to setup a newly inserted CPU in preparation for starting 1949 * it running code. 1950 */ 1951 int 1952 cpu_configure(int cpuid) 1953 { 1954 int retval = 0; 1955 1956 ASSERT(MUTEX_HELD(&cpu_lock)); 1957 1958 /* 1959 * Some structures are statically allocated based upon 1960 * the maximum number of cpus the system supports. Do not 1961 * try to add anything beyond this limit. 1962 */ 1963 if (cpuid < 0 || cpuid >= NCPU) { 1964 return (EINVAL); 1965 } 1966 1967 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) { 1968 return (EALREADY); 1969 } 1970 1971 if ((retval = mp_cpu_configure(cpuid)) != 0) { 1972 return (retval); 1973 } 1974 1975 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF; 1976 cpu_set_state(cpu[cpuid]); 1977 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG); 1978 if (retval != 0) 1979 (void) mp_cpu_unconfigure(cpuid); 1980 1981 return (retval); 1982 } 1983 1984 /* 1985 * Routine used to cleanup a CPU that has been powered off. This will 1986 * destroy all per-cpu information related to this cpu. 1987 */ 1988 int 1989 cpu_unconfigure(int cpuid) 1990 { 1991 int error; 1992 1993 ASSERT(MUTEX_HELD(&cpu_lock)); 1994 1995 if (cpu[cpuid] == NULL) { 1996 return (ENODEV); 1997 } 1998 1999 if (cpu[cpuid]->cpu_flags == 0) { 2000 return (EALREADY); 2001 } 2002 2003 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) { 2004 return (EBUSY); 2005 } 2006 2007 if (cpu[cpuid]->cpu_props != NULL) { 2008 (void) nvlist_free(cpu[cpuid]->cpu_props); 2009 cpu[cpuid]->cpu_props = NULL; 2010 } 2011 2012 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG); 2013 2014 if (error != 0) 2015 return (error); 2016 2017 return (mp_cpu_unconfigure(cpuid)); 2018 } 2019 2020 /* 2021 * Routines for registering and de-registering cpu_setup callback functions. 2022 * 2023 * Caller's context 2024 * These routines must not be called from a driver's attach(9E) or 2025 * detach(9E) entry point. 2026 * 2027 * NOTE: CPU callbacks should not block. They are called with cpu_lock held. 2028 */ 2029 2030 /* 2031 * Ideally, these would be dynamically allocated and put into a linked 2032 * list; however that is not feasible because the registration routine 2033 * has to be available before the kmem allocator is working (in fact, 2034 * it is called by the kmem allocator init code). In any case, there 2035 * are quite a few extra entries for future users. 2036 */ 2037 #define NCPU_SETUPS 20 2038 2039 struct cpu_setup { 2040 cpu_setup_func_t *func; 2041 void *arg; 2042 } cpu_setups[NCPU_SETUPS]; 2043 2044 void 2045 register_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2046 { 2047 int i; 2048 2049 ASSERT(MUTEX_HELD(&cpu_lock)); 2050 2051 for (i = 0; i < NCPU_SETUPS; i++) 2052 if (cpu_setups[i].func == NULL) 2053 break; 2054 if (i >= NCPU_SETUPS) 2055 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries"); 2056 2057 cpu_setups[i].func = func; 2058 cpu_setups[i].arg = arg; 2059 } 2060 2061 void 2062 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2063 { 2064 int i; 2065 2066 ASSERT(MUTEX_HELD(&cpu_lock)); 2067 2068 for (i = 0; i < NCPU_SETUPS; i++) 2069 if ((cpu_setups[i].func == func) && 2070 (cpu_setups[i].arg == arg)) 2071 break; 2072 if (i >= NCPU_SETUPS) 2073 cmn_err(CE_PANIC, "Could not find cpu_setup callback to " 2074 "deregister"); 2075 2076 cpu_setups[i].func = NULL; 2077 cpu_setups[i].arg = 0; 2078 } 2079 2080 /* 2081 * Call any state change hooks for this CPU, ignore any errors. 2082 */ 2083 void 2084 cpu_state_change_notify(int id, cpu_setup_t what) 2085 { 2086 int i; 2087 2088 ASSERT(MUTEX_HELD(&cpu_lock)); 2089 2090 for (i = 0; i < NCPU_SETUPS; i++) { 2091 if (cpu_setups[i].func != NULL) { 2092 cpu_setups[i].func(what, id, cpu_setups[i].arg); 2093 } 2094 } 2095 } 2096 2097 /* 2098 * Call any state change hooks for this CPU, undo it if error found. 2099 */ 2100 static int 2101 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo) 2102 { 2103 int i; 2104 int retval = 0; 2105 2106 ASSERT(MUTEX_HELD(&cpu_lock)); 2107 2108 for (i = 0; i < NCPU_SETUPS; i++) { 2109 if (cpu_setups[i].func != NULL) { 2110 retval = cpu_setups[i].func(what, id, 2111 cpu_setups[i].arg); 2112 if (retval) { 2113 for (i--; i >= 0; i--) { 2114 if (cpu_setups[i].func != NULL) 2115 cpu_setups[i].func(undo, 2116 id, cpu_setups[i].arg); 2117 } 2118 break; 2119 } 2120 } 2121 } 2122 return (retval); 2123 } 2124 2125 /* 2126 * Export information about this CPU via the kstat mechanism. 2127 */ 2128 static struct { 2129 kstat_named_t ci_state; 2130 kstat_named_t ci_state_begin; 2131 kstat_named_t ci_cpu_type; 2132 kstat_named_t ci_fpu_type; 2133 kstat_named_t ci_clock_MHz; 2134 kstat_named_t ci_chip_id; 2135 kstat_named_t ci_implementation; 2136 kstat_named_t ci_brandstr; 2137 kstat_named_t ci_core_id; 2138 kstat_named_t ci_curr_clock_Hz; 2139 kstat_named_t ci_supp_freq_Hz; 2140 #if defined(__sparcv9) 2141 kstat_named_t ci_device_ID; 2142 kstat_named_t ci_cpu_fru; 2143 #endif 2144 #if defined(__x86) 2145 kstat_named_t ci_vendorstr; 2146 kstat_named_t ci_family; 2147 kstat_named_t ci_model; 2148 kstat_named_t ci_step; 2149 kstat_named_t ci_clogid; 2150 kstat_named_t ci_pkg_core_id; 2151 kstat_named_t ci_ncpuperchip; 2152 kstat_named_t ci_ncoreperchip; 2153 #endif 2154 } cpu_info_template = { 2155 { "state", KSTAT_DATA_CHAR }, 2156 { "state_begin", KSTAT_DATA_LONG }, 2157 { "cpu_type", KSTAT_DATA_CHAR }, 2158 { "fpu_type", KSTAT_DATA_CHAR }, 2159 { "clock_MHz", KSTAT_DATA_LONG }, 2160 { "chip_id", KSTAT_DATA_LONG }, 2161 { "implementation", KSTAT_DATA_STRING }, 2162 { "brand", KSTAT_DATA_STRING }, 2163 { "core_id", KSTAT_DATA_LONG }, 2164 { "current_clock_Hz", KSTAT_DATA_UINT64 }, 2165 { "supported_frequencies_Hz", KSTAT_DATA_STRING }, 2166 #if defined(__sparcv9) 2167 { "device_ID", KSTAT_DATA_UINT64 }, 2168 { "cpu_fru", KSTAT_DATA_STRING }, 2169 #endif 2170 #if defined(__x86) 2171 { "vendor_id", KSTAT_DATA_STRING }, 2172 { "family", KSTAT_DATA_INT32 }, 2173 { "model", KSTAT_DATA_INT32 }, 2174 { "stepping", KSTAT_DATA_INT32 }, 2175 { "clog_id", KSTAT_DATA_INT32 }, 2176 { "pkg_core_id", KSTAT_DATA_LONG }, 2177 { "ncpu_per_chip", KSTAT_DATA_INT32 }, 2178 { "ncore_per_chip", KSTAT_DATA_INT32 }, 2179 #endif 2180 }; 2181 2182 static kmutex_t cpu_info_template_lock; 2183 2184 static int 2185 cpu_info_kstat_update(kstat_t *ksp, int rw) 2186 { 2187 cpu_t *cp = ksp->ks_private; 2188 const char *pi_state; 2189 2190 if (rw == KSTAT_WRITE) 2191 return (EACCES); 2192 2193 switch (cp->cpu_type_info.pi_state) { 2194 case P_ONLINE: 2195 pi_state = PS_ONLINE; 2196 break; 2197 case P_POWEROFF: 2198 pi_state = PS_POWEROFF; 2199 break; 2200 case P_NOINTR: 2201 pi_state = PS_NOINTR; 2202 break; 2203 case P_FAULTED: 2204 pi_state = PS_FAULTED; 2205 break; 2206 case P_SPARE: 2207 pi_state = PS_SPARE; 2208 break; 2209 case P_OFFLINE: 2210 pi_state = PS_OFFLINE; 2211 break; 2212 default: 2213 pi_state = "unknown"; 2214 } 2215 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state); 2216 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin; 2217 (void) strncpy(cpu_info_template.ci_cpu_type.value.c, 2218 cp->cpu_type_info.pi_processor_type, 15); 2219 (void) strncpy(cpu_info_template.ci_fpu_type.value.c, 2220 cp->cpu_type_info.pi_fputypes, 15); 2221 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; 2222 cpu_info_template.ci_chip_id.value.l = 2223 pg_plat_hw_instance_id(cp, PGHW_CHIP); 2224 kstat_named_setstr(&cpu_info_template.ci_implementation, 2225 cp->cpu_idstr); 2226 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); 2227 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); 2228 cpu_info_template.ci_curr_clock_Hz.value.ui64 = 2229 cp->cpu_curr_clock; 2230 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz, 2231 cp->cpu_supp_freqs); 2232 #if defined(__sparcv9) 2233 cpu_info_template.ci_device_ID.value.ui64 = 2234 cpunodes[cp->cpu_id].device_id; 2235 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp)); 2236 #endif 2237 #if defined(__x86) 2238 kstat_named_setstr(&cpu_info_template.ci_vendorstr, 2239 cpuid_getvendorstr(cp)); 2240 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); 2241 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); 2242 cpu_info_template.ci_step.value.l = cpuid_getstep(cp); 2243 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); 2244 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp); 2245 cpu_info_template.ci_ncoreperchip.value.l = 2246 cpuid_get_ncore_per_chip(cp); 2247 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); 2248 #endif 2249 2250 return (0); 2251 } 2252 2253 static void 2254 cpu_info_kstat_create(cpu_t *cp) 2255 { 2256 zoneid_t zoneid; 2257 2258 ASSERT(MUTEX_HELD(&cpu_lock)); 2259 2260 if (pool_pset_enabled()) 2261 zoneid = GLOBAL_ZONEID; 2262 else 2263 zoneid = ALL_ZONES; 2264 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id, 2265 NULL, "misc", KSTAT_TYPE_NAMED, 2266 sizeof (cpu_info_template) / sizeof (kstat_named_t), 2267 KSTAT_FLAG_VIRTUAL, zoneid)) != NULL) { 2268 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN; 2269 #if defined(__sparcv9) 2270 cp->cpu_info_kstat->ks_data_size += 2271 strlen(cpu_fru_fmri(cp)) + 1; 2272 #endif 2273 #if defined(__x86) 2274 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN; 2275 #endif 2276 if (cp->cpu_supp_freqs != NULL) 2277 cp->cpu_info_kstat->ks_data_size += 2278 strlen(cp->cpu_supp_freqs) + 1; 2279 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock; 2280 cp->cpu_info_kstat->ks_data = &cpu_info_template; 2281 cp->cpu_info_kstat->ks_private = cp; 2282 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update; 2283 kstat_install(cp->cpu_info_kstat); 2284 } 2285 } 2286 2287 static void 2288 cpu_info_kstat_destroy(cpu_t *cp) 2289 { 2290 ASSERT(MUTEX_HELD(&cpu_lock)); 2291 2292 kstat_delete(cp->cpu_info_kstat); 2293 cp->cpu_info_kstat = NULL; 2294 } 2295 2296 /* 2297 * Create and install kstats for the boot CPU. 2298 */ 2299 void 2300 cpu_kstat_init(cpu_t *cp) 2301 { 2302 mutex_enter(&cpu_lock); 2303 cpu_info_kstat_create(cp); 2304 cpu_stats_kstat_create(cp); 2305 cpu_create_intrstat(cp); 2306 cpu_set_state(cp); 2307 mutex_exit(&cpu_lock); 2308 } 2309 2310 /* 2311 * Make visible to the zone that subset of the cpu information that would be 2312 * initialized when a cpu is configured (but still offline). 2313 */ 2314 void 2315 cpu_visibility_configure(cpu_t *cp, zone_t *zone) 2316 { 2317 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2318 2319 ASSERT(MUTEX_HELD(&cpu_lock)); 2320 ASSERT(pool_pset_enabled()); 2321 ASSERT(cp != NULL); 2322 2323 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2324 zone->zone_ncpus++; 2325 ASSERT(zone->zone_ncpus <= ncpus); 2326 } 2327 if (cp->cpu_info_kstat != NULL) 2328 kstat_zone_add(cp->cpu_info_kstat, zoneid); 2329 } 2330 2331 /* 2332 * Make visible to the zone that subset of the cpu information that would be 2333 * initialized when a previously configured cpu is onlined. 2334 */ 2335 void 2336 cpu_visibility_online(cpu_t *cp, zone_t *zone) 2337 { 2338 kstat_t *ksp; 2339 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2340 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2341 processorid_t cpun; 2342 2343 ASSERT(MUTEX_HELD(&cpu_lock)); 2344 ASSERT(pool_pset_enabled()); 2345 ASSERT(cp != NULL); 2346 ASSERT(cpu_is_active(cp)); 2347 2348 cpun = cp->cpu_id; 2349 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2350 zone->zone_ncpus_online++; 2351 ASSERT(zone->zone_ncpus_online <= ncpus_online); 2352 } 2353 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2354 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2355 != NULL) { 2356 kstat_zone_add(ksp, zoneid); 2357 kstat_rele(ksp); 2358 } 2359 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2360 kstat_zone_add(ksp, zoneid); 2361 kstat_rele(ksp); 2362 } 2363 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2364 kstat_zone_add(ksp, zoneid); 2365 kstat_rele(ksp); 2366 } 2367 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2368 NULL) { 2369 kstat_zone_add(ksp, zoneid); 2370 kstat_rele(ksp); 2371 } 2372 } 2373 2374 /* 2375 * Update relevant kstats such that cpu is now visible to processes 2376 * executing in specified zone. 2377 */ 2378 void 2379 cpu_visibility_add(cpu_t *cp, zone_t *zone) 2380 { 2381 cpu_visibility_configure(cp, zone); 2382 if (cpu_is_active(cp)) 2383 cpu_visibility_online(cp, zone); 2384 } 2385 2386 /* 2387 * Make invisible to the zone that subset of the cpu information that would be 2388 * torn down when a previously offlined cpu is unconfigured. 2389 */ 2390 void 2391 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone) 2392 { 2393 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2394 2395 ASSERT(MUTEX_HELD(&cpu_lock)); 2396 ASSERT(pool_pset_enabled()); 2397 ASSERT(cp != NULL); 2398 2399 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2400 ASSERT(zone->zone_ncpus != 0); 2401 zone->zone_ncpus--; 2402 } 2403 if (cp->cpu_info_kstat) 2404 kstat_zone_remove(cp->cpu_info_kstat, zoneid); 2405 } 2406 2407 /* 2408 * Make invisible to the zone that subset of the cpu information that would be 2409 * torn down when a cpu is offlined (but still configured). 2410 */ 2411 void 2412 cpu_visibility_offline(cpu_t *cp, zone_t *zone) 2413 { 2414 kstat_t *ksp; 2415 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2416 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2417 processorid_t cpun; 2418 2419 ASSERT(MUTEX_HELD(&cpu_lock)); 2420 ASSERT(pool_pset_enabled()); 2421 ASSERT(cp != NULL); 2422 ASSERT(cpu_is_active(cp)); 2423 2424 cpun = cp->cpu_id; 2425 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2426 ASSERT(zone->zone_ncpus_online != 0); 2427 zone->zone_ncpus_online--; 2428 } 2429 2430 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2431 NULL) { 2432 kstat_zone_remove(ksp, zoneid); 2433 kstat_rele(ksp); 2434 } 2435 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2436 kstat_zone_remove(ksp, zoneid); 2437 kstat_rele(ksp); 2438 } 2439 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2440 kstat_zone_remove(ksp, zoneid); 2441 kstat_rele(ksp); 2442 } 2443 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2444 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2445 != NULL) { 2446 kstat_zone_remove(ksp, zoneid); 2447 kstat_rele(ksp); 2448 } 2449 } 2450 2451 /* 2452 * Update relevant kstats such that cpu is no longer visible to processes 2453 * executing in specified zone. 2454 */ 2455 void 2456 cpu_visibility_remove(cpu_t *cp, zone_t *zone) 2457 { 2458 if (cpu_is_active(cp)) 2459 cpu_visibility_offline(cp, zone); 2460 cpu_visibility_unconfigure(cp, zone); 2461 } 2462 2463 /* 2464 * Bind a thread to a CPU as requested. 2465 */ 2466 int 2467 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind, 2468 int *error) 2469 { 2470 processorid_t binding; 2471 cpu_t *cp = NULL; 2472 2473 ASSERT(MUTEX_HELD(&cpu_lock)); 2474 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 2475 2476 thread_lock(tp); 2477 2478 /* 2479 * Record old binding, but change the obind, which was initialized 2480 * to PBIND_NONE, only if this thread has a binding. This avoids 2481 * reporting PBIND_NONE for a process when some LWPs are bound. 2482 */ 2483 binding = tp->t_bind_cpu; 2484 if (binding != PBIND_NONE) 2485 *obind = binding; /* record old binding */ 2486 2487 switch (bind) { 2488 case PBIND_QUERY: 2489 /* Just return the old binding */ 2490 thread_unlock(tp); 2491 return (0); 2492 2493 case PBIND_QUERY_TYPE: 2494 /* Return the binding type */ 2495 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD; 2496 thread_unlock(tp); 2497 return (0); 2498 2499 case PBIND_SOFT: 2500 /* 2501 * Set soft binding for this thread and return the actual 2502 * binding 2503 */ 2504 TB_CPU_SOFT_SET(tp); 2505 thread_unlock(tp); 2506 return (0); 2507 2508 case PBIND_HARD: 2509 /* 2510 * Set hard binding for this thread and return the actual 2511 * binding 2512 */ 2513 TB_CPU_HARD_SET(tp); 2514 thread_unlock(tp); 2515 return (0); 2516 2517 default: 2518 break; 2519 } 2520 2521 /* 2522 * If this thread/LWP cannot be bound because of permission 2523 * problems, just note that and return success so that the 2524 * other threads/LWPs will be bound. This is the way 2525 * processor_bind() is defined to work. 2526 * 2527 * Binding will get EPERM if the thread is of system class 2528 * or hasprocperm() fails. 2529 */ 2530 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) { 2531 *error = EPERM; 2532 thread_unlock(tp); 2533 return (0); 2534 } 2535 2536 binding = bind; 2537 if (binding != PBIND_NONE) { 2538 cp = cpu_get((processorid_t)binding); 2539 /* 2540 * Make sure binding is valid and is in right partition. 2541 */ 2542 if (cp == NULL || tp->t_cpupart != cp->cpu_part) { 2543 *error = EINVAL; 2544 thread_unlock(tp); 2545 return (0); 2546 } 2547 } 2548 tp->t_bind_cpu = binding; /* set new binding */ 2549 2550 /* 2551 * If there is no system-set reason for affinity, set 2552 * the t_bound_cpu field to reflect the binding. 2553 */ 2554 if (tp->t_affinitycnt == 0) { 2555 if (binding == PBIND_NONE) { 2556 /* 2557 * We may need to adjust disp_max_unbound_pri 2558 * since we're becoming unbound. 2559 */ 2560 disp_adjust_unbound_pri(tp); 2561 2562 tp->t_bound_cpu = NULL; /* set new binding */ 2563 2564 /* 2565 * Move thread to lgroup with strongest affinity 2566 * after unbinding 2567 */ 2568 if (tp->t_lgrp_affinity) 2569 lgrp_move_thread(tp, 2570 lgrp_choose(tp, tp->t_cpupart), 1); 2571 2572 if (tp->t_state == TS_ONPROC && 2573 tp->t_cpu->cpu_part != tp->t_cpupart) 2574 cpu_surrender(tp); 2575 } else { 2576 lpl_t *lpl; 2577 2578 tp->t_bound_cpu = cp; 2579 ASSERT(cp->cpu_lpl != NULL); 2580 2581 /* 2582 * Set home to lgroup with most affinity containing CPU 2583 * that thread is being bound or minimum bounding 2584 * lgroup if no affinities set 2585 */ 2586 if (tp->t_lgrp_affinity) 2587 lpl = lgrp_affinity_best(tp, tp->t_cpupart, 2588 LGRP_NONE, B_FALSE); 2589 else 2590 lpl = cp->cpu_lpl; 2591 2592 if (tp->t_lpl != lpl) { 2593 /* can't grab cpu_lock */ 2594 lgrp_move_thread(tp, lpl, 1); 2595 } 2596 2597 /* 2598 * Make the thread switch to the bound CPU. 2599 * If the thread is runnable, we need to 2600 * requeue it even if t_cpu is already set 2601 * to the right CPU, since it may be on a 2602 * kpreempt queue and need to move to a local 2603 * queue. We could check t_disp_queue to 2604 * avoid unnecessary overhead if it's already 2605 * on the right queue, but since this isn't 2606 * a performance-critical operation it doesn't 2607 * seem worth the extra code and complexity. 2608 * 2609 * If the thread is weakbound to the cpu then it will 2610 * resist the new binding request until the weak 2611 * binding drops. The cpu_surrender or requeueing 2612 * below could be skipped in such cases (since it 2613 * will have no effect), but that would require 2614 * thread_allowmigrate to acquire thread_lock so 2615 * we'll take the very occasional hit here instead. 2616 */ 2617 if (tp->t_state == TS_ONPROC) { 2618 cpu_surrender(tp); 2619 } else if (tp->t_state == TS_RUN) { 2620 cpu_t *ocp = tp->t_cpu; 2621 2622 (void) dispdeq(tp); 2623 setbackdq(tp); 2624 /* 2625 * Either on the bound CPU's disp queue now, 2626 * or swapped out or on the swap queue. 2627 */ 2628 ASSERT(tp->t_disp_queue == cp->cpu_disp || 2629 tp->t_weakbound_cpu == ocp || 2630 (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) 2631 != TS_LOAD); 2632 } 2633 } 2634 } 2635 2636 /* 2637 * Our binding has changed; set TP_CHANGEBIND. 2638 */ 2639 tp->t_proc_flag |= TP_CHANGEBIND; 2640 aston(tp); 2641 2642 thread_unlock(tp); 2643 2644 return (0); 2645 } 2646 2647 #if CPUSET_WORDS > 1 2648 2649 /* 2650 * Functions for implementing cpuset operations when a cpuset is more 2651 * than one word. On platforms where a cpuset is a single word these 2652 * are implemented as macros in cpuvar.h. 2653 */ 2654 2655 void 2656 cpuset_all(cpuset_t *s) 2657 { 2658 int i; 2659 2660 for (i = 0; i < CPUSET_WORDS; i++) 2661 s->cpub[i] = ~0UL; 2662 } 2663 2664 void 2665 cpuset_all_but(cpuset_t *s, uint_t cpu) 2666 { 2667 cpuset_all(s); 2668 CPUSET_DEL(*s, cpu); 2669 } 2670 2671 void 2672 cpuset_only(cpuset_t *s, uint_t cpu) 2673 { 2674 CPUSET_ZERO(*s); 2675 CPUSET_ADD(*s, cpu); 2676 } 2677 2678 int 2679 cpuset_isnull(cpuset_t *s) 2680 { 2681 int i; 2682 2683 for (i = 0; i < CPUSET_WORDS; i++) 2684 if (s->cpub[i] != 0) 2685 return (0); 2686 return (1); 2687 } 2688 2689 int 2690 cpuset_cmp(cpuset_t *s1, cpuset_t *s2) 2691 { 2692 int i; 2693 2694 for (i = 0; i < CPUSET_WORDS; i++) 2695 if (s1->cpub[i] != s2->cpub[i]) 2696 return (0); 2697 return (1); 2698 } 2699 2700 uint_t 2701 cpuset_find(cpuset_t *s) 2702 { 2703 2704 uint_t i; 2705 uint_t cpu = (uint_t)-1; 2706 2707 /* 2708 * Find a cpu in the cpuset 2709 */ 2710 for (i = 0; i < CPUSET_WORDS; i++) { 2711 cpu = (uint_t)(lowbit(s->cpub[i]) - 1); 2712 if (cpu != (uint_t)-1) { 2713 cpu += i * BT_NBIPUL; 2714 break; 2715 } 2716 } 2717 return (cpu); 2718 } 2719 2720 void 2721 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid) 2722 { 2723 int i, j; 2724 uint_t bit; 2725 2726 /* 2727 * First, find the smallest cpu id in the set. 2728 */ 2729 for (i = 0; i < CPUSET_WORDS; i++) { 2730 if (s->cpub[i] != 0) { 2731 bit = (uint_t)(lowbit(s->cpub[i]) - 1); 2732 ASSERT(bit != (uint_t)-1); 2733 *smallestid = bit + (i * BT_NBIPUL); 2734 2735 /* 2736 * Now find the largest cpu id in 2737 * the set and return immediately. 2738 * Done in an inner loop to avoid 2739 * having to break out of the first 2740 * loop. 2741 */ 2742 for (j = CPUSET_WORDS - 1; j >= i; j--) { 2743 if (s->cpub[j] != 0) { 2744 bit = (uint_t)(highbit(s->cpub[j]) - 1); 2745 ASSERT(bit != (uint_t)-1); 2746 *largestid = bit + (j * BT_NBIPUL); 2747 ASSERT(*largestid >= *smallestid); 2748 return; 2749 } 2750 } 2751 2752 /* 2753 * If this code is reached, a 2754 * smallestid was found, but not a 2755 * largestid. The cpuset must have 2756 * been changed during the course 2757 * of this function call. 2758 */ 2759 ASSERT(0); 2760 } 2761 } 2762 *smallestid = *largestid = CPUSET_NOTINSET; 2763 } 2764 2765 #endif /* CPUSET_WORDS */ 2766 2767 /* 2768 * Unbind threads bound to specified CPU. 2769 * 2770 * If `unbind_all_threads' is true, unbind all user threads bound to a given 2771 * CPU. Otherwise unbind all soft-bound user threads. 2772 */ 2773 int 2774 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads) 2775 { 2776 processorid_t obind; 2777 kthread_t *tp; 2778 int ret = 0; 2779 proc_t *pp; 2780 int err, berr = 0; 2781 2782 ASSERT(MUTEX_HELD(&cpu_lock)); 2783 2784 mutex_enter(&pidlock); 2785 for (pp = practive; pp != NULL; pp = pp->p_next) { 2786 mutex_enter(&pp->p_lock); 2787 tp = pp->p_tlist; 2788 /* 2789 * Skip zombies, kernel processes, and processes in 2790 * other zones, if called from a non-global zone. 2791 */ 2792 if (tp == NULL || (pp->p_flag & SSYS) || 2793 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) { 2794 mutex_exit(&pp->p_lock); 2795 continue; 2796 } 2797 do { 2798 if (tp->t_bind_cpu != cpu) 2799 continue; 2800 /* 2801 * Skip threads with hard binding when 2802 * `unbind_all_threads' is not specified. 2803 */ 2804 if (!unbind_all_threads && TB_CPU_IS_HARD(tp)) 2805 continue; 2806 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr); 2807 if (ret == 0) 2808 ret = err; 2809 } while ((tp = tp->t_forw) != pp->p_tlist); 2810 mutex_exit(&pp->p_lock); 2811 } 2812 mutex_exit(&pidlock); 2813 if (ret == 0) 2814 ret = berr; 2815 return (ret); 2816 } 2817 2818 2819 /* 2820 * Destroy all remaining bound threads on a cpu. 2821 */ 2822 void 2823 cpu_destroy_bound_threads(cpu_t *cp) 2824 { 2825 extern id_t syscid; 2826 register kthread_id_t t, tlist, tnext; 2827 2828 /* 2829 * Destroy all remaining bound threads on the cpu. This 2830 * should include both the interrupt threads and the idle thread. 2831 * This requires some care, since we need to traverse the 2832 * thread list with the pidlock mutex locked, but thread_free 2833 * also locks the pidlock mutex. So, we collect the threads 2834 * we're going to reap in a list headed by "tlist", then we 2835 * unlock the pidlock mutex and traverse the tlist list, 2836 * doing thread_free's on the thread's. Simple, n'est pas? 2837 * Also, this depends on thread_free not mucking with the 2838 * t_next and t_prev links of the thread. 2839 */ 2840 2841 if ((t = curthread) != NULL) { 2842 2843 tlist = NULL; 2844 mutex_enter(&pidlock); 2845 do { 2846 tnext = t->t_next; 2847 if (t->t_bound_cpu == cp) { 2848 2849 /* 2850 * We've found a bound thread, carefully unlink 2851 * it out of the thread list, and add it to 2852 * our "tlist". We "know" we don't have to 2853 * worry about unlinking curthread (the thread 2854 * that is executing this code). 2855 */ 2856 t->t_next->t_prev = t->t_prev; 2857 t->t_prev->t_next = t->t_next; 2858 t->t_next = tlist; 2859 tlist = t; 2860 ASSERT(t->t_cid == syscid); 2861 /* wake up anyone blocked in thread_join */ 2862 cv_broadcast(&t->t_joincv); 2863 /* 2864 * t_lwp set by interrupt threads and not 2865 * cleared. 2866 */ 2867 t->t_lwp = NULL; 2868 /* 2869 * Pause and idle threads always have 2870 * t_state set to TS_ONPROC. 2871 */ 2872 t->t_state = TS_FREE; 2873 t->t_prev = NULL; /* Just in case */ 2874 } 2875 2876 } while ((t = tnext) != curthread); 2877 2878 mutex_exit(&pidlock); 2879 2880 mutex_sync(); 2881 for (t = tlist; t != NULL; t = tnext) { 2882 tnext = t->t_next; 2883 thread_free(t); 2884 } 2885 } 2886 } 2887 2888 /* 2889 * Update the cpu_supp_freqs of this cpu. This information is returned 2890 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then 2891 * maintain the kstat data size. 2892 */ 2893 void 2894 cpu_set_supp_freqs(cpu_t *cp, const char *freqs) 2895 { 2896 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */ 2897 const char *lfreqs = clkstr; 2898 boolean_t kstat_exists = B_FALSE; 2899 kstat_t *ksp; 2900 size_t len; 2901 2902 /* 2903 * A NULL pointer means we only support one speed. 2904 */ 2905 if (freqs == NULL) 2906 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64, 2907 cp->cpu_curr_clock); 2908 else 2909 lfreqs = freqs; 2910 2911 /* 2912 * Make sure the frequency doesn't change while a snapshot is 2913 * going on. Of course, we only need to worry about this if 2914 * the kstat exists. 2915 */ 2916 if ((ksp = cp->cpu_info_kstat) != NULL) { 2917 mutex_enter(ksp->ks_lock); 2918 kstat_exists = B_TRUE; 2919 } 2920 2921 /* 2922 * Free any previously allocated string and if the kstat 2923 * already exists, then update its data size. 2924 */ 2925 if (cp->cpu_supp_freqs != NULL) { 2926 len = strlen(cp->cpu_supp_freqs) + 1; 2927 kmem_free(cp->cpu_supp_freqs, len); 2928 if (kstat_exists) 2929 ksp->ks_data_size -= len; 2930 } 2931 2932 /* 2933 * Allocate the new string and set the pointer. 2934 */ 2935 len = strlen(lfreqs) + 1; 2936 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP); 2937 (void) strcpy(cp->cpu_supp_freqs, lfreqs); 2938 2939 /* 2940 * If the kstat already exists then update the data size and 2941 * free the lock. 2942 */ 2943 if (kstat_exists) { 2944 ksp->ks_data_size += len; 2945 mutex_exit(ksp->ks_lock); 2946 } 2947 } 2948 2949 /* 2950 * processor_info(2) and p_online(2) status support functions 2951 * The constants returned by the cpu_get_state() and cpu_get_state_str() are 2952 * for use in communicating processor state information to userland. Kernel 2953 * subsystems should only be using the cpu_flags value directly. Subsystems 2954 * modifying cpu_flags should record the state change via a call to the 2955 * cpu_set_state(). 2956 */ 2957 2958 /* 2959 * Update the pi_state of this CPU. This function provides the CPU status for 2960 * the information returned by processor_info(2). 2961 */ 2962 void 2963 cpu_set_state(cpu_t *cpu) 2964 { 2965 ASSERT(MUTEX_HELD(&cpu_lock)); 2966 cpu->cpu_type_info.pi_state = cpu_get_state(cpu); 2967 cpu->cpu_state_begin = gethrestime_sec(); 2968 pool_cpu_mod = gethrtime(); 2969 } 2970 2971 /* 2972 * Return offline/online/other status for the indicated CPU. Use only for 2973 * communication with user applications; cpu_flags provides the in-kernel 2974 * interface. 2975 */ 2976 int 2977 cpu_get_state(cpu_t *cpu) 2978 { 2979 ASSERT(MUTEX_HELD(&cpu_lock)); 2980 if (cpu->cpu_flags & CPU_POWEROFF) 2981 return (P_POWEROFF); 2982 else if (cpu->cpu_flags & CPU_FAULTED) 2983 return (P_FAULTED); 2984 else if (cpu->cpu_flags & CPU_SPARE) 2985 return (P_SPARE); 2986 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) 2987 return (P_OFFLINE); 2988 else if (cpu->cpu_flags & CPU_ENABLE) 2989 return (P_ONLINE); 2990 else 2991 return (P_NOINTR); 2992 } 2993 2994 /* 2995 * Return processor_info(2) state as a string. 2996 */ 2997 const char * 2998 cpu_get_state_str(cpu_t *cpu) 2999 { 3000 const char *string; 3001 3002 switch (cpu_get_state(cpu)) { 3003 case P_ONLINE: 3004 string = PS_ONLINE; 3005 break; 3006 case P_POWEROFF: 3007 string = PS_POWEROFF; 3008 break; 3009 case P_NOINTR: 3010 string = PS_NOINTR; 3011 break; 3012 case P_SPARE: 3013 string = PS_SPARE; 3014 break; 3015 case P_FAULTED: 3016 string = PS_FAULTED; 3017 break; 3018 case P_OFFLINE: 3019 string = PS_OFFLINE; 3020 break; 3021 default: 3022 string = "unknown"; 3023 break; 3024 } 3025 return (string); 3026 } 3027 3028 /* 3029 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named 3030 * kstats, respectively. This is done when a CPU is initialized or placed 3031 * online via p_online(2). 3032 */ 3033 static void 3034 cpu_stats_kstat_create(cpu_t *cp) 3035 { 3036 int instance = cp->cpu_id; 3037 char *module = "cpu"; 3038 char *class = "misc"; 3039 kstat_t *ksp; 3040 zoneid_t zoneid; 3041 3042 ASSERT(MUTEX_HELD(&cpu_lock)); 3043 3044 if (pool_pset_enabled()) 3045 zoneid = GLOBAL_ZONEID; 3046 else 3047 zoneid = ALL_ZONES; 3048 /* 3049 * Create named kstats 3050 */ 3051 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \ 3052 ksp = kstat_create_zone(module, instance, (name), class, \ 3053 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \ 3054 zoneid); \ 3055 if (ksp != NULL) { \ 3056 ksp->ks_private = cp; \ 3057 ksp->ks_update = (update_func); \ 3058 kstat_install(ksp); \ 3059 } else \ 3060 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \ 3061 module, instance, (name)); 3062 3063 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template), 3064 cpu_sys_stats_ks_update); 3065 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template), 3066 cpu_vm_stats_ks_update); 3067 3068 /* 3069 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat. 3070 */ 3071 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL, 3072 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid); 3073 if (ksp != NULL) { 3074 ksp->ks_update = cpu_stat_ks_update; 3075 ksp->ks_private = cp; 3076 kstat_install(ksp); 3077 } 3078 } 3079 3080 static void 3081 cpu_stats_kstat_destroy(cpu_t *cp) 3082 { 3083 char ks_name[KSTAT_STRLEN]; 3084 3085 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id); 3086 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name); 3087 3088 kstat_delete_byname("cpu", cp->cpu_id, "sys"); 3089 kstat_delete_byname("cpu", cp->cpu_id, "vm"); 3090 } 3091 3092 static int 3093 cpu_sys_stats_ks_update(kstat_t *ksp, int rw) 3094 { 3095 cpu_t *cp = (cpu_t *)ksp->ks_private; 3096 struct cpu_sys_stats_ks_data *csskd; 3097 cpu_sys_stats_t *css; 3098 hrtime_t msnsecs[NCMSTATES]; 3099 int i; 3100 3101 if (rw == KSTAT_WRITE) 3102 return (EACCES); 3103 3104 csskd = ksp->ks_data; 3105 css = &cp->cpu_stats.sys; 3106 3107 /* 3108 * Read CPU mstate, but compare with the last values we 3109 * received to make sure that the returned kstats never 3110 * decrease. 3111 */ 3112 3113 get_cpu_mstate(cp, msnsecs); 3114 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE]) 3115 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64; 3116 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER]) 3117 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64; 3118 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM]) 3119 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64; 3120 3121 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data, 3122 sizeof (cpu_sys_stats_ks_data_template)); 3123 3124 csskd->cpu_ticks_wait.value.ui64 = 0; 3125 csskd->wait_ticks_io.value.ui64 = 0; 3126 3127 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE]; 3128 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER]; 3129 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM]; 3130 csskd->cpu_ticks_idle.value.ui64 = 3131 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64); 3132 csskd->cpu_ticks_user.value.ui64 = 3133 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64); 3134 csskd->cpu_ticks_kernel.value.ui64 = 3135 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64); 3136 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast; 3137 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload; 3138 csskd->bread.value.ui64 = css->bread; 3139 csskd->bwrite.value.ui64 = css->bwrite; 3140 csskd->lread.value.ui64 = css->lread; 3141 csskd->lwrite.value.ui64 = css->lwrite; 3142 csskd->phread.value.ui64 = css->phread; 3143 csskd->phwrite.value.ui64 = css->phwrite; 3144 csskd->pswitch.value.ui64 = css->pswitch; 3145 csskd->trap.value.ui64 = css->trap; 3146 csskd->intr.value.ui64 = 0; 3147 for (i = 0; i < PIL_MAX; i++) 3148 csskd->intr.value.ui64 += css->intr[i]; 3149 csskd->syscall.value.ui64 = css->syscall; 3150 csskd->sysread.value.ui64 = css->sysread; 3151 csskd->syswrite.value.ui64 = css->syswrite; 3152 csskd->sysfork.value.ui64 = css->sysfork; 3153 csskd->sysvfork.value.ui64 = css->sysvfork; 3154 csskd->sysexec.value.ui64 = css->sysexec; 3155 csskd->readch.value.ui64 = css->readch; 3156 csskd->writech.value.ui64 = css->writech; 3157 csskd->rcvint.value.ui64 = css->rcvint; 3158 csskd->xmtint.value.ui64 = css->xmtint; 3159 csskd->mdmint.value.ui64 = css->mdmint; 3160 csskd->rawch.value.ui64 = css->rawch; 3161 csskd->canch.value.ui64 = css->canch; 3162 csskd->outch.value.ui64 = css->outch; 3163 csskd->msg.value.ui64 = css->msg; 3164 csskd->sema.value.ui64 = css->sema; 3165 csskd->namei.value.ui64 = css->namei; 3166 csskd->ufsiget.value.ui64 = css->ufsiget; 3167 csskd->ufsdirblk.value.ui64 = css->ufsdirblk; 3168 csskd->ufsipage.value.ui64 = css->ufsipage; 3169 csskd->ufsinopage.value.ui64 = css->ufsinopage; 3170 csskd->procovf.value.ui64 = css->procovf; 3171 csskd->intrthread.value.ui64 = 0; 3172 for (i = 0; i < LOCK_LEVEL - 1; i++) 3173 csskd->intrthread.value.ui64 += css->intr[i]; 3174 csskd->intrblk.value.ui64 = css->intrblk; 3175 csskd->intrunpin.value.ui64 = css->intrunpin; 3176 csskd->idlethread.value.ui64 = css->idlethread; 3177 csskd->inv_swtch.value.ui64 = css->inv_swtch; 3178 csskd->nthreads.value.ui64 = css->nthreads; 3179 csskd->cpumigrate.value.ui64 = css->cpumigrate; 3180 csskd->xcalls.value.ui64 = css->xcalls; 3181 csskd->mutex_adenters.value.ui64 = css->mutex_adenters; 3182 csskd->rw_rdfails.value.ui64 = css->rw_rdfails; 3183 csskd->rw_wrfails.value.ui64 = css->rw_wrfails; 3184 csskd->modload.value.ui64 = css->modload; 3185 csskd->modunload.value.ui64 = css->modunload; 3186 csskd->bawrite.value.ui64 = css->bawrite; 3187 csskd->iowait.value.ui64 = css->iowait; 3188 3189 return (0); 3190 } 3191 3192 static int 3193 cpu_vm_stats_ks_update(kstat_t *ksp, int rw) 3194 { 3195 cpu_t *cp = (cpu_t *)ksp->ks_private; 3196 struct cpu_vm_stats_ks_data *cvskd; 3197 cpu_vm_stats_t *cvs; 3198 3199 if (rw == KSTAT_WRITE) 3200 return (EACCES); 3201 3202 cvs = &cp->cpu_stats.vm; 3203 cvskd = ksp->ks_data; 3204 3205 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data, 3206 sizeof (cpu_vm_stats_ks_data_template)); 3207 cvskd->pgrec.value.ui64 = cvs->pgrec; 3208 cvskd->pgfrec.value.ui64 = cvs->pgfrec; 3209 cvskd->pgin.value.ui64 = cvs->pgin; 3210 cvskd->pgpgin.value.ui64 = cvs->pgpgin; 3211 cvskd->pgout.value.ui64 = cvs->pgout; 3212 cvskd->pgpgout.value.ui64 = cvs->pgpgout; 3213 cvskd->swapin.value.ui64 = cvs->swapin; 3214 cvskd->pgswapin.value.ui64 = cvs->pgswapin; 3215 cvskd->swapout.value.ui64 = cvs->swapout; 3216 cvskd->pgswapout.value.ui64 = cvs->pgswapout; 3217 cvskd->zfod.value.ui64 = cvs->zfod; 3218 cvskd->dfree.value.ui64 = cvs->dfree; 3219 cvskd->scan.value.ui64 = cvs->scan; 3220 cvskd->rev.value.ui64 = cvs->rev; 3221 cvskd->hat_fault.value.ui64 = cvs->hat_fault; 3222 cvskd->as_fault.value.ui64 = cvs->as_fault; 3223 cvskd->maj_fault.value.ui64 = cvs->maj_fault; 3224 cvskd->cow_fault.value.ui64 = cvs->cow_fault; 3225 cvskd->prot_fault.value.ui64 = cvs->prot_fault; 3226 cvskd->softlock.value.ui64 = cvs->softlock; 3227 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt; 3228 cvskd->pgrrun.value.ui64 = cvs->pgrrun; 3229 cvskd->execpgin.value.ui64 = cvs->execpgin; 3230 cvskd->execpgout.value.ui64 = cvs->execpgout; 3231 cvskd->execfree.value.ui64 = cvs->execfree; 3232 cvskd->anonpgin.value.ui64 = cvs->anonpgin; 3233 cvskd->anonpgout.value.ui64 = cvs->anonpgout; 3234 cvskd->anonfree.value.ui64 = cvs->anonfree; 3235 cvskd->fspgin.value.ui64 = cvs->fspgin; 3236 cvskd->fspgout.value.ui64 = cvs->fspgout; 3237 cvskd->fsfree.value.ui64 = cvs->fsfree; 3238 3239 return (0); 3240 } 3241 3242 static int 3243 cpu_stat_ks_update(kstat_t *ksp, int rw) 3244 { 3245 cpu_stat_t *cso; 3246 cpu_t *cp; 3247 int i; 3248 hrtime_t msnsecs[NCMSTATES]; 3249 3250 cso = (cpu_stat_t *)ksp->ks_data; 3251 cp = (cpu_t *)ksp->ks_private; 3252 3253 if (rw == KSTAT_WRITE) 3254 return (EACCES); 3255 3256 /* 3257 * Read CPU mstate, but compare with the last values we 3258 * received to make sure that the returned kstats never 3259 * decrease. 3260 */ 3261 3262 get_cpu_mstate(cp, msnsecs); 3263 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3264 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]); 3265 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3266 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE]) 3267 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE]; 3268 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER]) 3269 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER]; 3270 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM]) 3271 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM]; 3272 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0; 3273 cso->cpu_sysinfo.wait[W_IO] = 0; 3274 cso->cpu_sysinfo.wait[W_SWAP] = 0; 3275 cso->cpu_sysinfo.wait[W_PIO] = 0; 3276 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread); 3277 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite); 3278 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread); 3279 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite); 3280 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread); 3281 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite); 3282 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch); 3283 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap); 3284 cso->cpu_sysinfo.intr = 0; 3285 for (i = 0; i < PIL_MAX; i++) 3286 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]); 3287 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall); 3288 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread); 3289 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite); 3290 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork); 3291 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork); 3292 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec); 3293 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch); 3294 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech); 3295 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint); 3296 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint); 3297 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint); 3298 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch); 3299 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch); 3300 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch); 3301 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg); 3302 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema); 3303 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei); 3304 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget); 3305 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk); 3306 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage); 3307 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage); 3308 cso->cpu_sysinfo.inodeovf = 0; 3309 cso->cpu_sysinfo.fileovf = 0; 3310 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf); 3311 cso->cpu_sysinfo.intrthread = 0; 3312 for (i = 0; i < LOCK_LEVEL - 1; i++) 3313 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]); 3314 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk); 3315 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread); 3316 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch); 3317 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads); 3318 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate); 3319 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls); 3320 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters); 3321 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails); 3322 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails); 3323 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload); 3324 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload); 3325 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite); 3326 cso->cpu_sysinfo.rw_enters = 0; 3327 cso->cpu_sysinfo.win_uo_cnt = 0; 3328 cso->cpu_sysinfo.win_uu_cnt = 0; 3329 cso->cpu_sysinfo.win_so_cnt = 0; 3330 cso->cpu_sysinfo.win_su_cnt = 0; 3331 cso->cpu_sysinfo.win_suo_cnt = 0; 3332 3333 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait); 3334 cso->cpu_syswait.swap = 0; 3335 cso->cpu_syswait.physio = 0; 3336 3337 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec); 3338 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec); 3339 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin); 3340 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin); 3341 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout); 3342 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout); 3343 cso->cpu_vminfo.swapin = CPU_STATS(cp, vm.swapin); 3344 cso->cpu_vminfo.pgswapin = CPU_STATS(cp, vm.pgswapin); 3345 cso->cpu_vminfo.swapout = CPU_STATS(cp, vm.swapout); 3346 cso->cpu_vminfo.pgswapout = CPU_STATS(cp, vm.pgswapout); 3347 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod); 3348 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree); 3349 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan); 3350 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev); 3351 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault); 3352 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault); 3353 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault); 3354 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault); 3355 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault); 3356 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock); 3357 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt); 3358 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun); 3359 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin); 3360 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout); 3361 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree); 3362 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin); 3363 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout); 3364 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree); 3365 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin); 3366 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout); 3367 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree); 3368 3369 return (0); 3370 } 3371