1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Architecture-independent CPU control functions. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/var.h> 33 #include <sys/thread.h> 34 #include <sys/cpuvar.h> 35 #include <sys/kstat.h> 36 #include <sys/uadmin.h> 37 #include <sys/systm.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/procset.h> 41 #include <sys/processor.h> 42 #include <sys/debug.h> 43 #include <sys/cpupart.h> 44 #include <sys/lgrp.h> 45 #include <sys/pset.h> 46 #include <sys/pghw.h> 47 #include <sys/kmem.h> 48 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ 49 #include <sys/atomic.h> 50 #include <sys/callb.h> 51 #include <sys/vtrace.h> 52 #include <sys/cyclic.h> 53 #include <sys/bitmap.h> 54 #include <sys/nvpair.h> 55 #include <sys/pool_pset.h> 56 #include <sys/msacct.h> 57 #include <sys/time.h> 58 #include <sys/archsystm.h> 59 #include <sys/sdt.h> 60 #if defined(__x86) || defined(__amd64) 61 #include <sys/x86_archext.h> 62 #endif 63 #include <sys/callo.h> 64 65 extern int mp_cpu_start(cpu_t *); 66 extern int mp_cpu_stop(cpu_t *); 67 extern int mp_cpu_poweron(cpu_t *); 68 extern int mp_cpu_poweroff(cpu_t *); 69 extern int mp_cpu_configure(int); 70 extern int mp_cpu_unconfigure(int); 71 extern void mp_cpu_faulted_enter(cpu_t *); 72 extern void mp_cpu_faulted_exit(cpu_t *); 73 74 extern int cmp_cpu_to_chip(processorid_t cpuid); 75 #ifdef __sparcv9 76 extern char *cpu_fru_fmri(cpu_t *cp); 77 #endif 78 79 static void cpu_add_active_internal(cpu_t *cp); 80 static void cpu_remove_active(cpu_t *cp); 81 static void cpu_info_kstat_create(cpu_t *cp); 82 static void cpu_info_kstat_destroy(cpu_t *cp); 83 static void cpu_stats_kstat_create(cpu_t *cp); 84 static void cpu_stats_kstat_destroy(cpu_t *cp); 85 86 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw); 87 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw); 88 static int cpu_stat_ks_update(kstat_t *ksp, int rw); 89 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t); 90 91 /* 92 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active, 93 * and dispatch queue reallocations. The lock ordering with respect to 94 * related locks is: 95 * 96 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock() 97 * 98 * Warning: Certain sections of code do not use the cpu_lock when 99 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since 100 * all cpus are paused during modifications to this list, a solution 101 * to protect the list is too either disable kernel preemption while 102 * walking the list, *or* recheck the cpu_next pointer at each 103 * iteration in the loop. Note that in no cases can any cached 104 * copies of the cpu pointers be kept as they may become invalid. 105 */ 106 kmutex_t cpu_lock; 107 cpu_t *cpu_list; /* list of all CPUs */ 108 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */ 109 cpu_t *cpu_active; /* list of active CPUs */ 110 static cpuset_t cpu_available; /* set of available CPUs */ 111 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */ 112 113 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */ 114 115 /* 116 * max_ncpus keeps the max cpus the system can have. Initially 117 * it's NCPU, but since most archs scan the devtree for cpus 118 * fairly early on during boot, the real max can be known before 119 * ncpus is set (useful for early NCPU based allocations). 120 */ 121 int max_ncpus = NCPU; 122 /* 123 * platforms that set max_ncpus to maxiumum number of cpus that can be 124 * dynamically added will set boot_max_ncpus to the number of cpus found 125 * at device tree scan time during boot. 126 */ 127 int boot_max_ncpus = -1; 128 int boot_ncpus = -1; 129 /* 130 * Maximum possible CPU id. This can never be >= NCPU since NCPU is 131 * used to size arrays that are indexed by CPU id. 132 */ 133 processorid_t max_cpuid = NCPU - 1; 134 135 int ncpus = 1; 136 int ncpus_online = 1; 137 138 /* 139 * CPU that we're trying to offline. Protected by cpu_lock. 140 */ 141 cpu_t *cpu_inmotion; 142 143 /* 144 * Can be raised to suppress further weakbinding, which are instead 145 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, 146 * while individual thread weakbinding synchronisation is done under thread 147 * lock. 148 */ 149 int weakbindingbarrier; 150 151 /* 152 * Variables used in pause_cpus(). 153 */ 154 static volatile char safe_list[NCPU]; 155 156 static struct _cpu_pause_info { 157 int cp_spl; /* spl saved in pause_cpus() */ 158 volatile int cp_go; /* Go signal sent after all ready */ 159 int cp_count; /* # of CPUs to pause */ 160 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */ 161 kthread_id_t cp_paused; 162 } cpu_pause_info; 163 164 static kmutex_t pause_free_mutex; 165 static kcondvar_t pause_free_cv; 166 167 void *(*cpu_pause_func)(void *) = NULL; 168 169 170 static struct cpu_sys_stats_ks_data { 171 kstat_named_t cpu_ticks_idle; 172 kstat_named_t cpu_ticks_user; 173 kstat_named_t cpu_ticks_kernel; 174 kstat_named_t cpu_ticks_wait; 175 kstat_named_t cpu_nsec_idle; 176 kstat_named_t cpu_nsec_user; 177 kstat_named_t cpu_nsec_kernel; 178 kstat_named_t cpu_nsec_intr; 179 kstat_named_t cpu_load_intr; 180 kstat_named_t wait_ticks_io; 181 kstat_named_t bread; 182 kstat_named_t bwrite; 183 kstat_named_t lread; 184 kstat_named_t lwrite; 185 kstat_named_t phread; 186 kstat_named_t phwrite; 187 kstat_named_t pswitch; 188 kstat_named_t trap; 189 kstat_named_t intr; 190 kstat_named_t syscall; 191 kstat_named_t sysread; 192 kstat_named_t syswrite; 193 kstat_named_t sysfork; 194 kstat_named_t sysvfork; 195 kstat_named_t sysexec; 196 kstat_named_t readch; 197 kstat_named_t writech; 198 kstat_named_t rcvint; 199 kstat_named_t xmtint; 200 kstat_named_t mdmint; 201 kstat_named_t rawch; 202 kstat_named_t canch; 203 kstat_named_t outch; 204 kstat_named_t msg; 205 kstat_named_t sema; 206 kstat_named_t namei; 207 kstat_named_t ufsiget; 208 kstat_named_t ufsdirblk; 209 kstat_named_t ufsipage; 210 kstat_named_t ufsinopage; 211 kstat_named_t procovf; 212 kstat_named_t intrthread; 213 kstat_named_t intrblk; 214 kstat_named_t intrunpin; 215 kstat_named_t idlethread; 216 kstat_named_t inv_swtch; 217 kstat_named_t nthreads; 218 kstat_named_t cpumigrate; 219 kstat_named_t xcalls; 220 kstat_named_t mutex_adenters; 221 kstat_named_t rw_rdfails; 222 kstat_named_t rw_wrfails; 223 kstat_named_t modload; 224 kstat_named_t modunload; 225 kstat_named_t bawrite; 226 kstat_named_t iowait; 227 } cpu_sys_stats_ks_data_template = { 228 { "cpu_ticks_idle", KSTAT_DATA_UINT64 }, 229 { "cpu_ticks_user", KSTAT_DATA_UINT64 }, 230 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 }, 231 { "cpu_ticks_wait", KSTAT_DATA_UINT64 }, 232 { "cpu_nsec_idle", KSTAT_DATA_UINT64 }, 233 { "cpu_nsec_user", KSTAT_DATA_UINT64 }, 234 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 }, 235 { "cpu_nsec_intr", KSTAT_DATA_UINT64 }, 236 { "cpu_load_intr", KSTAT_DATA_UINT64 }, 237 { "wait_ticks_io", KSTAT_DATA_UINT64 }, 238 { "bread", KSTAT_DATA_UINT64 }, 239 { "bwrite", KSTAT_DATA_UINT64 }, 240 { "lread", KSTAT_DATA_UINT64 }, 241 { "lwrite", KSTAT_DATA_UINT64 }, 242 { "phread", KSTAT_DATA_UINT64 }, 243 { "phwrite", KSTAT_DATA_UINT64 }, 244 { "pswitch", KSTAT_DATA_UINT64 }, 245 { "trap", KSTAT_DATA_UINT64 }, 246 { "intr", KSTAT_DATA_UINT64 }, 247 { "syscall", KSTAT_DATA_UINT64 }, 248 { "sysread", KSTAT_DATA_UINT64 }, 249 { "syswrite", KSTAT_DATA_UINT64 }, 250 { "sysfork", KSTAT_DATA_UINT64 }, 251 { "sysvfork", KSTAT_DATA_UINT64 }, 252 { "sysexec", KSTAT_DATA_UINT64 }, 253 { "readch", KSTAT_DATA_UINT64 }, 254 { "writech", KSTAT_DATA_UINT64 }, 255 { "rcvint", KSTAT_DATA_UINT64 }, 256 { "xmtint", KSTAT_DATA_UINT64 }, 257 { "mdmint", KSTAT_DATA_UINT64 }, 258 { "rawch", KSTAT_DATA_UINT64 }, 259 { "canch", KSTAT_DATA_UINT64 }, 260 { "outch", KSTAT_DATA_UINT64 }, 261 { "msg", KSTAT_DATA_UINT64 }, 262 { "sema", KSTAT_DATA_UINT64 }, 263 { "namei", KSTAT_DATA_UINT64 }, 264 { "ufsiget", KSTAT_DATA_UINT64 }, 265 { "ufsdirblk", KSTAT_DATA_UINT64 }, 266 { "ufsipage", KSTAT_DATA_UINT64 }, 267 { "ufsinopage", KSTAT_DATA_UINT64 }, 268 { "procovf", KSTAT_DATA_UINT64 }, 269 { "intrthread", KSTAT_DATA_UINT64 }, 270 { "intrblk", KSTAT_DATA_UINT64 }, 271 { "intrunpin", KSTAT_DATA_UINT64 }, 272 { "idlethread", KSTAT_DATA_UINT64 }, 273 { "inv_swtch", KSTAT_DATA_UINT64 }, 274 { "nthreads", KSTAT_DATA_UINT64 }, 275 { "cpumigrate", KSTAT_DATA_UINT64 }, 276 { "xcalls", KSTAT_DATA_UINT64 }, 277 { "mutex_adenters", KSTAT_DATA_UINT64 }, 278 { "rw_rdfails", KSTAT_DATA_UINT64 }, 279 { "rw_wrfails", KSTAT_DATA_UINT64 }, 280 { "modload", KSTAT_DATA_UINT64 }, 281 { "modunload", KSTAT_DATA_UINT64 }, 282 { "bawrite", KSTAT_DATA_UINT64 }, 283 { "iowait", KSTAT_DATA_UINT64 }, 284 }; 285 286 static struct cpu_vm_stats_ks_data { 287 kstat_named_t pgrec; 288 kstat_named_t pgfrec; 289 kstat_named_t pgin; 290 kstat_named_t pgpgin; 291 kstat_named_t pgout; 292 kstat_named_t pgpgout; 293 kstat_named_t swapin; 294 kstat_named_t pgswapin; 295 kstat_named_t swapout; 296 kstat_named_t pgswapout; 297 kstat_named_t zfod; 298 kstat_named_t dfree; 299 kstat_named_t scan; 300 kstat_named_t rev; 301 kstat_named_t hat_fault; 302 kstat_named_t as_fault; 303 kstat_named_t maj_fault; 304 kstat_named_t cow_fault; 305 kstat_named_t prot_fault; 306 kstat_named_t softlock; 307 kstat_named_t kernel_asflt; 308 kstat_named_t pgrrun; 309 kstat_named_t execpgin; 310 kstat_named_t execpgout; 311 kstat_named_t execfree; 312 kstat_named_t anonpgin; 313 kstat_named_t anonpgout; 314 kstat_named_t anonfree; 315 kstat_named_t fspgin; 316 kstat_named_t fspgout; 317 kstat_named_t fsfree; 318 } cpu_vm_stats_ks_data_template = { 319 { "pgrec", KSTAT_DATA_UINT64 }, 320 { "pgfrec", KSTAT_DATA_UINT64 }, 321 { "pgin", KSTAT_DATA_UINT64 }, 322 { "pgpgin", KSTAT_DATA_UINT64 }, 323 { "pgout", KSTAT_DATA_UINT64 }, 324 { "pgpgout", KSTAT_DATA_UINT64 }, 325 { "swapin", KSTAT_DATA_UINT64 }, 326 { "pgswapin", KSTAT_DATA_UINT64 }, 327 { "swapout", KSTAT_DATA_UINT64 }, 328 { "pgswapout", KSTAT_DATA_UINT64 }, 329 { "zfod", KSTAT_DATA_UINT64 }, 330 { "dfree", KSTAT_DATA_UINT64 }, 331 { "scan", KSTAT_DATA_UINT64 }, 332 { "rev", KSTAT_DATA_UINT64 }, 333 { "hat_fault", KSTAT_DATA_UINT64 }, 334 { "as_fault", KSTAT_DATA_UINT64 }, 335 { "maj_fault", KSTAT_DATA_UINT64 }, 336 { "cow_fault", KSTAT_DATA_UINT64 }, 337 { "prot_fault", KSTAT_DATA_UINT64 }, 338 { "softlock", KSTAT_DATA_UINT64 }, 339 { "kernel_asflt", KSTAT_DATA_UINT64 }, 340 { "pgrrun", KSTAT_DATA_UINT64 }, 341 { "execpgin", KSTAT_DATA_UINT64 }, 342 { "execpgout", KSTAT_DATA_UINT64 }, 343 { "execfree", KSTAT_DATA_UINT64 }, 344 { "anonpgin", KSTAT_DATA_UINT64 }, 345 { "anonpgout", KSTAT_DATA_UINT64 }, 346 { "anonfree", KSTAT_DATA_UINT64 }, 347 { "fspgin", KSTAT_DATA_UINT64 }, 348 { "fspgout", KSTAT_DATA_UINT64 }, 349 { "fsfree", KSTAT_DATA_UINT64 }, 350 }; 351 352 /* 353 * Force the specified thread to migrate to the appropriate processor. 354 * Called with thread lock held, returns with it dropped. 355 */ 356 static void 357 force_thread_migrate(kthread_id_t tp) 358 { 359 ASSERT(THREAD_LOCK_HELD(tp)); 360 if (tp == curthread) { 361 THREAD_TRANSITION(tp); 362 CL_SETRUN(tp); 363 thread_unlock_nopreempt(tp); 364 swtch(); 365 } else { 366 if (tp->t_state == TS_ONPROC) { 367 cpu_surrender(tp); 368 } else if (tp->t_state == TS_RUN) { 369 (void) dispdeq(tp); 370 setbackdq(tp); 371 } 372 thread_unlock(tp); 373 } 374 } 375 376 /* 377 * Set affinity for a specified CPU. 378 * A reference count is incremented and the affinity is held until the 379 * reference count is decremented to zero by thread_affinity_clear(). 380 * This is so regions of code requiring affinity can be nested. 381 * Caller needs to ensure that cpu_id remains valid, which can be 382 * done by holding cpu_lock across this call, unless the caller 383 * specifies CPU_CURRENT in which case the cpu_lock will be acquired 384 * by thread_affinity_set and CPU->cpu_id will be the target CPU. 385 */ 386 void 387 thread_affinity_set(kthread_id_t t, int cpu_id) 388 { 389 cpu_t *cp; 390 int c; 391 392 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL)); 393 394 if ((c = cpu_id) == CPU_CURRENT) { 395 mutex_enter(&cpu_lock); 396 cpu_id = CPU->cpu_id; 397 } 398 /* 399 * We should be asserting that cpu_lock is held here, but 400 * the NCA code doesn't acquire it. The following assert 401 * should be uncommented when the NCA code is fixed. 402 * 403 * ASSERT(MUTEX_HELD(&cpu_lock)); 404 */ 405 ASSERT((cpu_id >= 0) && (cpu_id < NCPU)); 406 cp = cpu[cpu_id]; 407 ASSERT(cp != NULL); /* user must provide a good cpu_id */ 408 /* 409 * If there is already a hard affinity requested, and this affinity 410 * conflicts with that, panic. 411 */ 412 thread_lock(t); 413 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) { 414 panic("affinity_set: setting %p but already bound to %p", 415 (void *)cp, (void *)t->t_bound_cpu); 416 } 417 t->t_affinitycnt++; 418 t->t_bound_cpu = cp; 419 420 /* 421 * Make sure we're running on the right CPU. 422 */ 423 if (cp != t->t_cpu || t != curthread) { 424 force_thread_migrate(t); /* drops thread lock */ 425 } else { 426 thread_unlock(t); 427 } 428 429 if (c == CPU_CURRENT) 430 mutex_exit(&cpu_lock); 431 } 432 433 /* 434 * Wrapper for backward compatibility. 435 */ 436 void 437 affinity_set(int cpu_id) 438 { 439 thread_affinity_set(curthread, cpu_id); 440 } 441 442 /* 443 * Decrement the affinity reservation count and if it becomes zero, 444 * clear the CPU affinity for the current thread, or set it to the user's 445 * software binding request. 446 */ 447 void 448 thread_affinity_clear(kthread_id_t t) 449 { 450 register processorid_t binding; 451 452 thread_lock(t); 453 if (--t->t_affinitycnt == 0) { 454 if ((binding = t->t_bind_cpu) == PBIND_NONE) { 455 /* 456 * Adjust disp_max_unbound_pri if necessary. 457 */ 458 disp_adjust_unbound_pri(t); 459 t->t_bound_cpu = NULL; 460 if (t->t_cpu->cpu_part != t->t_cpupart) { 461 force_thread_migrate(t); 462 return; 463 } 464 } else { 465 t->t_bound_cpu = cpu[binding]; 466 /* 467 * Make sure the thread is running on the bound CPU. 468 */ 469 if (t->t_cpu != t->t_bound_cpu) { 470 force_thread_migrate(t); 471 return; /* already dropped lock */ 472 } 473 } 474 } 475 thread_unlock(t); 476 } 477 478 /* 479 * Wrapper for backward compatibility. 480 */ 481 void 482 affinity_clear(void) 483 { 484 thread_affinity_clear(curthread); 485 } 486 487 /* 488 * Weak cpu affinity. Bind to the "current" cpu for short periods 489 * of time during which the thread must not block (but may be preempted). 490 * Use this instead of kpreempt_disable() when it is only "no migration" 491 * rather than "no preemption" semantics that are required - disabling 492 * preemption holds higher priority threads off of cpu and if the 493 * operation that is protected is more than momentary this is not good 494 * for realtime etc. 495 * 496 * Weakly bound threads will not prevent a cpu from being offlined - 497 * we'll only run them on the cpu to which they are weakly bound but 498 * (because they do not block) we'll always be able to move them on to 499 * another cpu at offline time if we give them just a short moment to 500 * run during which they will unbind. To give a cpu a chance of offlining, 501 * however, we require a barrier to weak bindings that may be raised for a 502 * given cpu (offline/move code may set this and then wait a short time for 503 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier. 504 * 505 * There are few restrictions on the calling context of thread_nomigrate. 506 * The caller must not hold the thread lock. Calls may be nested. 507 * 508 * After weakbinding a thread must not perform actions that may block. 509 * In particular it must not call thread_affinity_set; calling that when 510 * already weakbound is nonsensical anyway. 511 * 512 * If curthread is prevented from migrating for other reasons 513 * (kernel preemption disabled; high pil; strongly bound; interrupt thread) 514 * then the weak binding will succeed even if this cpu is the target of an 515 * offline/move request. 516 */ 517 void 518 thread_nomigrate(void) 519 { 520 cpu_t *cp; 521 kthread_id_t t = curthread; 522 523 again: 524 kpreempt_disable(); 525 cp = CPU; 526 527 /* 528 * A highlevel interrupt must not modify t_nomigrate or 529 * t_weakbound_cpu of the thread it has interrupted. A lowlevel 530 * interrupt thread cannot migrate and we can avoid the 531 * thread_lock call below by short-circuiting here. In either 532 * case we can just return since no migration is possible and 533 * the condition will persist (ie, when we test for these again 534 * in thread_allowmigrate they can't have changed). Migration 535 * is also impossible if we're at or above DISP_LEVEL pil. 536 */ 537 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD || 538 getpil() >= DISP_LEVEL) { 539 kpreempt_enable(); 540 return; 541 } 542 543 /* 544 * We must be consistent with existing weak bindings. Since we 545 * may be interrupted between the increment of t_nomigrate and 546 * the store to t_weakbound_cpu below we cannot assume that 547 * t_weakbound_cpu will be set if t_nomigrate is. Note that we 548 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not 549 * always the case. 550 */ 551 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) { 552 if (!panicstr) 553 panic("thread_nomigrate: binding to %p but already " 554 "bound to %p", (void *)cp, 555 (void *)t->t_weakbound_cpu); 556 } 557 558 /* 559 * At this point we have preemption disabled and we don't yet hold 560 * the thread lock. So it's possible that somebody else could 561 * set t_bind_cpu here and not be able to force us across to the 562 * new cpu (since we have preemption disabled). 563 */ 564 thread_lock(curthread); 565 566 /* 567 * If further weak bindings are being (temporarily) suppressed then 568 * we'll settle for disabling kernel preemption (which assures 569 * no migration provided the thread does not block which it is 570 * not allowed to if using thread_nomigrate). We must remember 571 * this disposition so we can take appropriate action in 572 * thread_allowmigrate. If this is a nested call and the 573 * thread is already weakbound then fall through as normal. 574 * We remember the decision to settle for kpreempt_disable through 575 * negative nesting counting in t_nomigrate. Once a thread has had one 576 * weakbinding request satisfied in this way any further (nested) 577 * requests will continue to be satisfied in the same way, 578 * even if weak bindings have recommenced. 579 */ 580 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) { 581 --t->t_nomigrate; 582 thread_unlock(curthread); 583 return; /* with kpreempt_disable still active */ 584 } 585 586 /* 587 * We hold thread_lock so t_bind_cpu cannot change. We could, 588 * however, be running on a different cpu to which we are t_bound_cpu 589 * to (as explained above). If we grant the weak binding request 590 * in that case then the dispatcher must favour our weak binding 591 * over our strong (in which case, just as when preemption is 592 * disabled, we can continue to run on a cpu other than the one to 593 * which we are strongbound; the difference in this case is that 594 * this thread can be preempted and so can appear on the dispatch 595 * queues of a cpu other than the one it is strongbound to). 596 * 597 * If the cpu we are running on does not appear to be a current 598 * offline target (we check cpu_inmotion to determine this - since 599 * we don't hold cpu_lock we may not see a recent store to that, 600 * so it's possible that we at times can grant a weak binding to a 601 * cpu that is an offline target, but that one request will not 602 * prevent the offline from succeeding) then we will always grant 603 * the weak binding request. This includes the case above where 604 * we grant a weakbinding not commensurate with our strong binding. 605 * 606 * If our cpu does appear to be an offline target then we're inclined 607 * not to grant the weakbinding request just yet - we'd prefer to 608 * migrate to another cpu and grant the request there. The 609 * exceptions are those cases where going through preemption code 610 * will not result in us changing cpu: 611 * 612 * . interrupts have already bypassed this case (see above) 613 * . we are already weakbound to this cpu (dispatcher code will 614 * always return us to the weakbound cpu) 615 * . preemption was disabled even before we disabled it above 616 * . we are strongbound to this cpu (if we're strongbound to 617 * another and not yet running there the trip through the 618 * dispatcher will move us to the strongbound cpu and we 619 * will grant the weak binding there) 620 */ 621 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 || 622 t->t_bound_cpu == cp) { 623 /* 624 * Don't be tempted to store to t_weakbound_cpu only on 625 * the first nested bind request - if we're interrupted 626 * after the increment of t_nomigrate and before the 627 * store to t_weakbound_cpu and the interrupt calls 628 * thread_nomigrate then the assertion in thread_allowmigrate 629 * would fail. 630 */ 631 t->t_nomigrate++; 632 t->t_weakbound_cpu = cp; 633 membar_producer(); 634 thread_unlock(curthread); 635 /* 636 * Now that we have dropped the thread_lock another thread 637 * can set our t_weakbound_cpu, and will try to migrate us 638 * to the strongbound cpu (which will not be prevented by 639 * preemption being disabled since we're about to enable 640 * preemption). We have granted the weakbinding to the current 641 * cpu, so again we are in the position that is is is possible 642 * that our weak and strong bindings differ. Again this 643 * is catered for by dispatcher code which will favour our 644 * weak binding. 645 */ 646 kpreempt_enable(); 647 } else { 648 /* 649 * Move to another cpu before granting the request by 650 * forcing this thread through preemption code. When we 651 * get to set{front,back}dq called from CL_PREEMPT() 652 * cpu_choose() will be used to select a cpu to queue 653 * us on - that will see cpu_inmotion and take 654 * steps to avoid returning us to this cpu. 655 */ 656 cp->cpu_kprunrun = 1; 657 thread_unlock(curthread); 658 kpreempt_enable(); /* will call preempt() */ 659 goto again; 660 } 661 } 662 663 void 664 thread_allowmigrate(void) 665 { 666 kthread_id_t t = curthread; 667 668 ASSERT(t->t_weakbound_cpu == CPU || 669 (t->t_nomigrate < 0 && t->t_preempt > 0) || 670 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD || 671 getpil() >= DISP_LEVEL); 672 673 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) || 674 getpil() >= DISP_LEVEL) 675 return; 676 677 if (t->t_nomigrate < 0) { 678 /* 679 * This thread was granted "weak binding" in the 680 * stronger form of kernel preemption disabling. 681 * Undo a level of nesting for both t_nomigrate 682 * and t_preempt. 683 */ 684 ++t->t_nomigrate; 685 kpreempt_enable(); 686 } else if (--t->t_nomigrate == 0) { 687 /* 688 * Time to drop the weak binding. We need to cater 689 * for the case where we're weakbound to a different 690 * cpu than that to which we're strongbound (a very 691 * temporary arrangement that must only persist until 692 * weak binding drops). We don't acquire thread_lock 693 * here so even as this code executes t_bound_cpu 694 * may be changing. So we disable preemption and 695 * a) in the case that t_bound_cpu changes while we 696 * have preemption disabled kprunrun will be set 697 * asynchronously, and b) if before disabling 698 * preemption we were already on a different cpu to 699 * our t_bound_cpu then we set kprunrun ourselves 700 * to force a trip through the dispatcher when 701 * preemption is enabled. 702 */ 703 kpreempt_disable(); 704 if (t->t_bound_cpu && 705 t->t_weakbound_cpu != t->t_bound_cpu) 706 CPU->cpu_kprunrun = 1; 707 t->t_weakbound_cpu = NULL; 708 membar_producer(); 709 kpreempt_enable(); 710 } 711 } 712 713 /* 714 * weakbinding_stop can be used to temporarily cause weakbindings made 715 * with thread_nomigrate to be satisfied through the stronger action of 716 * kpreempt_disable. weakbinding_start recommences normal weakbinding. 717 */ 718 719 void 720 weakbinding_stop(void) 721 { 722 ASSERT(MUTEX_HELD(&cpu_lock)); 723 weakbindingbarrier = 1; 724 membar_producer(); /* make visible before subsequent thread_lock */ 725 } 726 727 void 728 weakbinding_start(void) 729 { 730 ASSERT(MUTEX_HELD(&cpu_lock)); 731 weakbindingbarrier = 0; 732 } 733 734 void 735 null_xcall(void) 736 { 737 } 738 739 /* 740 * This routine is called to place the CPUs in a safe place so that 741 * one of them can be taken off line or placed on line. What we are 742 * trying to do here is prevent a thread from traversing the list 743 * of active CPUs while we are changing it or from getting placed on 744 * the run queue of a CPU that has just gone off line. We do this by 745 * creating a thread with the highest possible prio for each CPU and 746 * having it call this routine. The advantage of this method is that 747 * we can eliminate all checks for CPU_ACTIVE in the disp routines. 748 * This makes disp faster at the expense of making p_online() slower 749 * which is a good trade off. 750 */ 751 static void 752 cpu_pause(int index) 753 { 754 int s; 755 struct _cpu_pause_info *cpi = &cpu_pause_info; 756 volatile char *safe = &safe_list[index]; 757 long lindex = index; 758 759 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE)); 760 761 while (*safe != PAUSE_DIE) { 762 *safe = PAUSE_READY; 763 membar_enter(); /* make sure stores are flushed */ 764 sema_v(&cpi->cp_sem); /* signal requesting thread */ 765 766 /* 767 * Wait here until all pause threads are running. That 768 * indicates that it's safe to do the spl. Until 769 * cpu_pause_info.cp_go is set, we don't want to spl 770 * because that might block clock interrupts needed 771 * to preempt threads on other CPUs. 772 */ 773 while (cpi->cp_go == 0) 774 ; 775 /* 776 * Even though we are at the highest disp prio, we need 777 * to block out all interrupts below LOCK_LEVEL so that 778 * an intr doesn't come in, wake up a thread, and call 779 * setbackdq/setfrontdq. 780 */ 781 s = splhigh(); 782 /* 783 * if cpu_pause_func() has been set then call it using 784 * index as the argument, currently only used by 785 * cpr_suspend_cpus(). This function is used as the 786 * code to execute on the "paused" cpu's when a machine 787 * comes out of a sleep state and CPU's were powered off. 788 * (could also be used for hotplugging CPU's). 789 */ 790 if (cpu_pause_func != NULL) 791 (*cpu_pause_func)((void *)lindex); 792 793 mach_cpu_pause(safe); 794 795 splx(s); 796 /* 797 * Waiting is at an end. Switch out of cpu_pause 798 * loop and resume useful work. 799 */ 800 swtch(); 801 } 802 803 mutex_enter(&pause_free_mutex); 804 *safe = PAUSE_DEAD; 805 cv_broadcast(&pause_free_cv); 806 mutex_exit(&pause_free_mutex); 807 } 808 809 /* 810 * Allow the cpus to start running again. 811 */ 812 void 813 start_cpus() 814 { 815 int i; 816 817 ASSERT(MUTEX_HELD(&cpu_lock)); 818 ASSERT(cpu_pause_info.cp_paused); 819 cpu_pause_info.cp_paused = NULL; 820 for (i = 0; i < NCPU; i++) 821 safe_list[i] = PAUSE_IDLE; 822 membar_enter(); /* make sure stores are flushed */ 823 affinity_clear(); 824 splx(cpu_pause_info.cp_spl); 825 kpreempt_enable(); 826 } 827 828 /* 829 * Allocate a pause thread for a CPU. 830 */ 831 static void 832 cpu_pause_alloc(cpu_t *cp) 833 { 834 kthread_id_t t; 835 long cpun = cp->cpu_id; 836 837 /* 838 * Note, v.v_nglobpris will not change value as long as I hold 839 * cpu_lock. 840 */ 841 t = thread_create(NULL, 0, cpu_pause, (void *)cpun, 842 0, &p0, TS_STOPPED, v.v_nglobpris - 1); 843 thread_lock(t); 844 t->t_bound_cpu = cp; 845 t->t_disp_queue = cp->cpu_disp; 846 t->t_affinitycnt = 1; 847 t->t_preempt = 1; 848 thread_unlock(t); 849 cp->cpu_pause_thread = t; 850 /* 851 * Registering a thread in the callback table is usually done 852 * in the initialization code of the thread. In this 853 * case, we do it right after thread creation because the 854 * thread itself may never run, and we need to register the 855 * fact that it is safe for cpr suspend. 856 */ 857 CALLB_CPR_INIT_SAFE(t, "cpu_pause"); 858 } 859 860 /* 861 * Free a pause thread for a CPU. 862 */ 863 static void 864 cpu_pause_free(cpu_t *cp) 865 { 866 kthread_id_t t; 867 int cpun = cp->cpu_id; 868 869 ASSERT(MUTEX_HELD(&cpu_lock)); 870 /* 871 * We have to get the thread and tell him to die. 872 */ 873 if ((t = cp->cpu_pause_thread) == NULL) { 874 ASSERT(safe_list[cpun] == PAUSE_IDLE); 875 return; 876 } 877 thread_lock(t); 878 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */ 879 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */ 880 t->t_pri = v.v_nglobpris - 1; 881 ASSERT(safe_list[cpun] == PAUSE_IDLE); 882 safe_list[cpun] = PAUSE_DIE; 883 THREAD_TRANSITION(t); 884 setbackdq(t); 885 thread_unlock_nopreempt(t); 886 887 /* 888 * If we don't wait for the thread to actually die, it may try to 889 * run on the wrong cpu as part of an actual call to pause_cpus(). 890 */ 891 mutex_enter(&pause_free_mutex); 892 while (safe_list[cpun] != PAUSE_DEAD) { 893 cv_wait(&pause_free_cv, &pause_free_mutex); 894 } 895 mutex_exit(&pause_free_mutex); 896 safe_list[cpun] = PAUSE_IDLE; 897 898 cp->cpu_pause_thread = NULL; 899 } 900 901 /* 902 * Initialize basic structures for pausing CPUs. 903 */ 904 void 905 cpu_pause_init() 906 { 907 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL); 908 /* 909 * Create initial CPU pause thread. 910 */ 911 cpu_pause_alloc(CPU); 912 } 913 914 /* 915 * Start the threads used to pause another CPU. 916 */ 917 static int 918 cpu_pause_start(processorid_t cpu_id) 919 { 920 int i; 921 int cpu_count = 0; 922 923 for (i = 0; i < NCPU; i++) { 924 cpu_t *cp; 925 kthread_id_t t; 926 927 cp = cpu[i]; 928 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) { 929 safe_list[i] = PAUSE_WAIT; 930 continue; 931 } 932 933 /* 934 * Skip CPU if it is quiesced or not yet started. 935 */ 936 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) { 937 safe_list[i] = PAUSE_WAIT; 938 continue; 939 } 940 941 /* 942 * Start this CPU's pause thread. 943 */ 944 t = cp->cpu_pause_thread; 945 thread_lock(t); 946 /* 947 * Reset the priority, since nglobpris may have 948 * changed since the thread was created, if someone 949 * has loaded the RT (or some other) scheduling 950 * class. 951 */ 952 t->t_pri = v.v_nglobpris - 1; 953 THREAD_TRANSITION(t); 954 setbackdq(t); 955 thread_unlock_nopreempt(t); 956 ++cpu_count; 957 } 958 return (cpu_count); 959 } 960 961 962 /* 963 * Pause all of the CPUs except the one we are on by creating a high 964 * priority thread bound to those CPUs. 965 * 966 * Note that one must be extremely careful regarding code 967 * executed while CPUs are paused. Since a CPU may be paused 968 * while a thread scheduling on that CPU is holding an adaptive 969 * lock, code executed with CPUs paused must not acquire adaptive 970 * (or low-level spin) locks. Also, such code must not block, 971 * since the thread that is supposed to initiate the wakeup may 972 * never run. 973 * 974 * With a few exceptions, the restrictions on code executed with CPUs 975 * paused match those for code executed at high-level interrupt 976 * context. 977 */ 978 void 979 pause_cpus(cpu_t *off_cp) 980 { 981 processorid_t cpu_id; 982 int i; 983 struct _cpu_pause_info *cpi = &cpu_pause_info; 984 985 ASSERT(MUTEX_HELD(&cpu_lock)); 986 ASSERT(cpi->cp_paused == NULL); 987 cpi->cp_count = 0; 988 cpi->cp_go = 0; 989 for (i = 0; i < NCPU; i++) 990 safe_list[i] = PAUSE_IDLE; 991 kpreempt_disable(); 992 993 /* 994 * If running on the cpu that is going offline, get off it. 995 * This is so that it won't be necessary to rechoose a CPU 996 * when done. 997 */ 998 if (CPU == off_cp) 999 cpu_id = off_cp->cpu_next_part->cpu_id; 1000 else 1001 cpu_id = CPU->cpu_id; 1002 affinity_set(cpu_id); 1003 1004 /* 1005 * Start the pause threads and record how many were started 1006 */ 1007 cpi->cp_count = cpu_pause_start(cpu_id); 1008 1009 /* 1010 * Now wait for all CPUs to be running the pause thread. 1011 */ 1012 while (cpi->cp_count > 0) { 1013 /* 1014 * Spin reading the count without grabbing the disp 1015 * lock to make sure we don't prevent the pause 1016 * threads from getting the lock. 1017 */ 1018 while (sema_held(&cpi->cp_sem)) 1019 ; 1020 if (sema_tryp(&cpi->cp_sem)) 1021 --cpi->cp_count; 1022 } 1023 cpi->cp_go = 1; /* all have reached cpu_pause */ 1024 1025 /* 1026 * Now wait for all CPUs to spl. (Transition from PAUSE_READY 1027 * to PAUSE_WAIT.) 1028 */ 1029 for (i = 0; i < NCPU; i++) { 1030 while (safe_list[i] != PAUSE_WAIT) 1031 ; 1032 } 1033 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */ 1034 cpi->cp_paused = curthread; 1035 } 1036 1037 /* 1038 * Check whether the current thread has CPUs paused 1039 */ 1040 int 1041 cpus_paused(void) 1042 { 1043 if (cpu_pause_info.cp_paused != NULL) { 1044 ASSERT(cpu_pause_info.cp_paused == curthread); 1045 return (1); 1046 } 1047 return (0); 1048 } 1049 1050 static cpu_t * 1051 cpu_get_all(processorid_t cpun) 1052 { 1053 ASSERT(MUTEX_HELD(&cpu_lock)); 1054 1055 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun)) 1056 return (NULL); 1057 return (cpu[cpun]); 1058 } 1059 1060 /* 1061 * Check whether cpun is a valid processor id and whether it should be 1062 * visible from the current zone. If it is, return a pointer to the 1063 * associated CPU structure. 1064 */ 1065 cpu_t * 1066 cpu_get(processorid_t cpun) 1067 { 1068 cpu_t *c; 1069 1070 ASSERT(MUTEX_HELD(&cpu_lock)); 1071 c = cpu_get_all(cpun); 1072 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 1073 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c)) 1074 return (NULL); 1075 return (c); 1076 } 1077 1078 /* 1079 * The following functions should be used to check CPU states in the kernel. 1080 * They should be invoked with cpu_lock held. Kernel subsystems interested 1081 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc 1082 * states. Those are for user-land (and system call) use only. 1083 */ 1084 1085 /* 1086 * Determine whether the CPU is online and handling interrupts. 1087 */ 1088 int 1089 cpu_is_online(cpu_t *cpu) 1090 { 1091 ASSERT(MUTEX_HELD(&cpu_lock)); 1092 return (cpu_flagged_online(cpu->cpu_flags)); 1093 } 1094 1095 /* 1096 * Determine whether the CPU is offline (this includes spare and faulted). 1097 */ 1098 int 1099 cpu_is_offline(cpu_t *cpu) 1100 { 1101 ASSERT(MUTEX_HELD(&cpu_lock)); 1102 return (cpu_flagged_offline(cpu->cpu_flags)); 1103 } 1104 1105 /* 1106 * Determine whether the CPU is powered off. 1107 */ 1108 int 1109 cpu_is_poweredoff(cpu_t *cpu) 1110 { 1111 ASSERT(MUTEX_HELD(&cpu_lock)); 1112 return (cpu_flagged_poweredoff(cpu->cpu_flags)); 1113 } 1114 1115 /* 1116 * Determine whether the CPU is handling interrupts. 1117 */ 1118 int 1119 cpu_is_nointr(cpu_t *cpu) 1120 { 1121 ASSERT(MUTEX_HELD(&cpu_lock)); 1122 return (cpu_flagged_nointr(cpu->cpu_flags)); 1123 } 1124 1125 /* 1126 * Determine whether the CPU is active (scheduling threads). 1127 */ 1128 int 1129 cpu_is_active(cpu_t *cpu) 1130 { 1131 ASSERT(MUTEX_HELD(&cpu_lock)); 1132 return (cpu_flagged_active(cpu->cpu_flags)); 1133 } 1134 1135 /* 1136 * Same as above, but these require cpu_flags instead of cpu_t pointers. 1137 */ 1138 int 1139 cpu_flagged_online(cpu_flag_t cpu_flags) 1140 { 1141 return (cpu_flagged_active(cpu_flags) && 1142 (cpu_flags & CPU_ENABLE)); 1143 } 1144 1145 int 1146 cpu_flagged_offline(cpu_flag_t cpu_flags) 1147 { 1148 return (((cpu_flags & CPU_POWEROFF) == 0) && 1149 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)); 1150 } 1151 1152 int 1153 cpu_flagged_poweredoff(cpu_flag_t cpu_flags) 1154 { 1155 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF); 1156 } 1157 1158 int 1159 cpu_flagged_nointr(cpu_flag_t cpu_flags) 1160 { 1161 return (cpu_flagged_active(cpu_flags) && 1162 (cpu_flags & CPU_ENABLE) == 0); 1163 } 1164 1165 int 1166 cpu_flagged_active(cpu_flag_t cpu_flags) 1167 { 1168 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) && 1169 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY)); 1170 } 1171 1172 /* 1173 * Bring the indicated CPU online. 1174 */ 1175 int 1176 cpu_online(cpu_t *cp) 1177 { 1178 int error = 0; 1179 1180 /* 1181 * Handle on-line request. 1182 * This code must put the new CPU on the active list before 1183 * starting it because it will not be paused, and will start 1184 * using the active list immediately. The real start occurs 1185 * when the CPU_QUIESCED flag is turned off. 1186 */ 1187 1188 ASSERT(MUTEX_HELD(&cpu_lock)); 1189 1190 /* 1191 * Put all the cpus into a known safe place. 1192 * No mutexes can be entered while CPUs are paused. 1193 */ 1194 error = mp_cpu_start(cp); /* arch-dep hook */ 1195 if (error == 0) { 1196 pg_cpupart_in(cp, cp->cpu_part); 1197 pause_cpus(NULL); 1198 cpu_add_active_internal(cp); 1199 if (cp->cpu_flags & CPU_FAULTED) { 1200 cp->cpu_flags &= ~CPU_FAULTED; 1201 mp_cpu_faulted_exit(cp); 1202 } 1203 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN | 1204 CPU_SPARE); 1205 start_cpus(); 1206 cpu_stats_kstat_create(cp); 1207 cpu_create_intrstat(cp); 1208 lgrp_kstat_create(cp); 1209 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1210 cpu_intr_enable(cp); /* arch-dep hook */ 1211 cpu_set_state(cp); 1212 cyclic_online(cp); 1213 /* 1214 * This has to be called only after cyclic_online(). This 1215 * function uses cyclics. 1216 */ 1217 callout_cpu_online(cp); 1218 poke_cpu(cp->cpu_id); 1219 } 1220 1221 return (error); 1222 } 1223 1224 /* 1225 * Take the indicated CPU offline. 1226 */ 1227 int 1228 cpu_offline(cpu_t *cp, int flags) 1229 { 1230 cpupart_t *pp; 1231 int error = 0; 1232 cpu_t *ncp; 1233 int intr_enable; 1234 int cyclic_off = 0; 1235 int callout_off = 0; 1236 int loop_count; 1237 int no_quiesce = 0; 1238 int (*bound_func)(struct cpu *, int); 1239 kthread_t *t; 1240 lpl_t *cpu_lpl; 1241 proc_t *p; 1242 int lgrp_diff_lpl; 1243 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0; 1244 1245 ASSERT(MUTEX_HELD(&cpu_lock)); 1246 1247 /* 1248 * If we're going from faulted or spare to offline, just 1249 * clear these flags and update CPU state. 1250 */ 1251 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) { 1252 if (cp->cpu_flags & CPU_FAULTED) { 1253 cp->cpu_flags &= ~CPU_FAULTED; 1254 mp_cpu_faulted_exit(cp); 1255 } 1256 cp->cpu_flags &= ~CPU_SPARE; 1257 cpu_set_state(cp); 1258 return (0); 1259 } 1260 1261 /* 1262 * Handle off-line request. 1263 */ 1264 pp = cp->cpu_part; 1265 /* 1266 * Don't offline last online CPU in partition 1267 */ 1268 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2) 1269 return (EBUSY); 1270 /* 1271 * Unbind all soft-bound threads bound to our CPU and hard bound threads 1272 * if we were asked to. 1273 */ 1274 error = cpu_unbind(cp->cpu_id, unbind_all_threads); 1275 if (error != 0) 1276 return (error); 1277 /* 1278 * We shouldn't be bound to this CPU ourselves. 1279 */ 1280 if (curthread->t_bound_cpu == cp) 1281 return (EBUSY); 1282 1283 /* 1284 * Tell interested parties that this CPU is going offline. 1285 */ 1286 cpu_state_change_notify(cp->cpu_id, CPU_OFF); 1287 1288 /* 1289 * Tell the PG subsystem that the CPU is leaving the partition 1290 */ 1291 pg_cpupart_out(cp, pp); 1292 1293 /* 1294 * Take the CPU out of interrupt participation so we won't find 1295 * bound kernel threads. If the architecture cannot completely 1296 * shut off interrupts on the CPU, don't quiesce it, but don't 1297 * run anything but interrupt thread... this is indicated by 1298 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being 1299 * off. 1300 */ 1301 intr_enable = cp->cpu_flags & CPU_ENABLE; 1302 if (intr_enable) 1303 no_quiesce = cpu_intr_disable(cp); 1304 1305 /* 1306 * Record that we are aiming to offline this cpu. This acts as 1307 * a barrier to further weak binding requests in thread_nomigrate 1308 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to 1309 * lean away from this cpu. Further strong bindings are already 1310 * avoided since we hold cpu_lock. Since threads that are set 1311 * runnable around now and others coming off the target cpu are 1312 * directed away from the target, existing strong and weak bindings 1313 * (especially the latter) to the target cpu stand maximum chance of 1314 * being able to unbind during the short delay loop below (if other 1315 * unbound threads compete they may not see cpu in time to unbind 1316 * even if they would do so immediately. 1317 */ 1318 cpu_inmotion = cp; 1319 membar_enter(); 1320 1321 /* 1322 * Check for kernel threads (strong or weak) bound to that CPU. 1323 * Strongly bound threads may not unbind, and we'll have to return 1324 * EBUSY. Weakly bound threads should always disappear - we've 1325 * stopped more weak binding with cpu_inmotion and existing 1326 * bindings will drain imminently (they may not block). Nonetheless 1327 * we will wait for a fixed period for all bound threads to disappear. 1328 * Inactive interrupt threads are OK (they'll be in TS_FREE 1329 * state). If test finds some bound threads, wait a few ticks 1330 * to give short-lived threads (such as interrupts) chance to 1331 * complete. Note that if no_quiesce is set, i.e. this cpu 1332 * is required to service interrupts, then we take the route 1333 * that permits interrupt threads to be active (or bypassed). 1334 */ 1335 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads; 1336 1337 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { 1338 if (loop_count >= 5) { 1339 error = EBUSY; /* some threads still bound */ 1340 break; 1341 } 1342 1343 /* 1344 * If some threads were assigned, give them 1345 * a chance to complete or move. 1346 * 1347 * This assumes that the clock_thread is not bound 1348 * to any CPU, because the clock_thread is needed to 1349 * do the delay(hz/100). 1350 * 1351 * Note: we still hold the cpu_lock while waiting for 1352 * the next clock tick. This is OK since it isn't 1353 * needed for anything else except processor_bind(2), 1354 * and system initialization. If we drop the lock, 1355 * we would risk another p_online disabling the last 1356 * processor. 1357 */ 1358 delay(hz/100); 1359 } 1360 1361 if (error == 0 && callout_off == 0) { 1362 callout_cpu_offline(cp); 1363 callout_off = 1; 1364 } 1365 1366 if (error == 0 && cyclic_off == 0) { 1367 if (!cyclic_offline(cp)) { 1368 /* 1369 * We must have bound cyclics... 1370 */ 1371 error = EBUSY; 1372 goto out; 1373 } 1374 cyclic_off = 1; 1375 } 1376 1377 /* 1378 * Call mp_cpu_stop() to perform any special operations 1379 * needed for this machine architecture to offline a CPU. 1380 */ 1381 if (error == 0) 1382 error = mp_cpu_stop(cp); /* arch-dep hook */ 1383 1384 /* 1385 * If that all worked, take the CPU offline and decrement 1386 * ncpus_online. 1387 */ 1388 if (error == 0) { 1389 /* 1390 * Put all the cpus into a known safe place. 1391 * No mutexes can be entered while CPUs are paused. 1392 */ 1393 pause_cpus(cp); 1394 /* 1395 * Repeat the operation, if necessary, to make sure that 1396 * all outstanding low-level interrupts run to completion 1397 * before we set the CPU_QUIESCED flag. It's also possible 1398 * that a thread has weak bound to the cpu despite our raising 1399 * cpu_inmotion above since it may have loaded that 1400 * value before the barrier became visible (this would have 1401 * to be the thread that was on the target cpu at the time 1402 * we raised the barrier). 1403 */ 1404 if ((!no_quiesce && cp->cpu_intr_actv != 0) || 1405 (*bound_func)(cp, 1)) { 1406 start_cpus(); 1407 (void) mp_cpu_start(cp); 1408 goto again; 1409 } 1410 ncp = cp->cpu_next_part; 1411 cpu_lpl = cp->cpu_lpl; 1412 ASSERT(cpu_lpl != NULL); 1413 1414 /* 1415 * Remove the CPU from the list of active CPUs. 1416 */ 1417 cpu_remove_active(cp); 1418 1419 /* 1420 * Walk the active process list and look for threads 1421 * whose home lgroup needs to be updated, or 1422 * the last CPU they run on is the one being offlined now. 1423 */ 1424 1425 ASSERT(curthread->t_cpu != cp); 1426 for (p = practive; p != NULL; p = p->p_next) { 1427 1428 t = p->p_tlist; 1429 1430 if (t == NULL) 1431 continue; 1432 1433 lgrp_diff_lpl = 0; 1434 1435 do { 1436 ASSERT(t->t_lpl != NULL); 1437 /* 1438 * Taking last CPU in lpl offline 1439 * Rehome thread if it is in this lpl 1440 * Otherwise, update the count of how many 1441 * threads are in this CPU's lgroup but have 1442 * a different lpl. 1443 */ 1444 1445 if (cpu_lpl->lpl_ncpu == 0) { 1446 if (t->t_lpl == cpu_lpl) 1447 lgrp_move_thread(t, 1448 lgrp_choose(t, 1449 t->t_cpupart), 0); 1450 else if (t->t_lpl->lpl_lgrpid == 1451 cpu_lpl->lpl_lgrpid) 1452 lgrp_diff_lpl++; 1453 } 1454 ASSERT(t->t_lpl->lpl_ncpu > 0); 1455 1456 /* 1457 * Update CPU last ran on if it was this CPU 1458 */ 1459 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1460 t->t_cpu = disp_lowpri_cpu(ncp, 1461 t->t_lpl, t->t_pri, NULL); 1462 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1463 t->t_weakbound_cpu == cp); 1464 1465 t = t->t_forw; 1466 } while (t != p->p_tlist); 1467 1468 /* 1469 * Didn't find any threads in the same lgroup as this 1470 * CPU with a different lpl, so remove the lgroup from 1471 * the process lgroup bitmask. 1472 */ 1473 1474 if (lgrp_diff_lpl == 0) 1475 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid); 1476 } 1477 1478 /* 1479 * Walk thread list looking for threads that need to be 1480 * rehomed, since there are some threads that are not in 1481 * their process's p_tlist. 1482 */ 1483 1484 t = curthread; 1485 do { 1486 ASSERT(t != NULL && t->t_lpl != NULL); 1487 1488 /* 1489 * Rehome threads with same lpl as this CPU when this 1490 * is the last CPU in the lpl. 1491 */ 1492 1493 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl)) 1494 lgrp_move_thread(t, 1495 lgrp_choose(t, t->t_cpupart), 1); 1496 1497 ASSERT(t->t_lpl->lpl_ncpu > 0); 1498 1499 /* 1500 * Update CPU last ran on if it was this CPU 1501 */ 1502 1503 if (t->t_cpu == cp && t->t_bound_cpu != cp) { 1504 t->t_cpu = disp_lowpri_cpu(ncp, 1505 t->t_lpl, t->t_pri, NULL); 1506 } 1507 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1508 t->t_weakbound_cpu == cp); 1509 t = t->t_next; 1510 1511 } while (t != curthread); 1512 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0); 1513 cp->cpu_flags |= CPU_OFFLINE; 1514 disp_cpu_inactive(cp); 1515 if (!no_quiesce) 1516 cp->cpu_flags |= CPU_QUIESCED; 1517 ncpus_online--; 1518 cpu_set_state(cp); 1519 cpu_inmotion = NULL; 1520 start_cpus(); 1521 cpu_stats_kstat_destroy(cp); 1522 cpu_delete_intrstat(cp); 1523 lgrp_kstat_destroy(cp); 1524 } 1525 1526 out: 1527 cpu_inmotion = NULL; 1528 1529 /* 1530 * If we failed, re-enable interrupts. 1531 * Do this even if cpu_intr_disable returned an error, because 1532 * it may have partially disabled interrupts. 1533 */ 1534 if (error && intr_enable) 1535 cpu_intr_enable(cp); 1536 1537 /* 1538 * If we failed, but managed to offline the cyclic subsystem on this 1539 * CPU, bring it back online. 1540 */ 1541 if (error && cyclic_off) 1542 cyclic_online(cp); 1543 1544 /* 1545 * If we failed, but managed to offline callouts on this CPU, 1546 * bring it back online. 1547 */ 1548 if (error && callout_off) 1549 callout_cpu_online(cp); 1550 1551 /* 1552 * If we failed, tell the PG subsystem that the CPU is back 1553 */ 1554 pg_cpupart_in(cp, pp); 1555 1556 /* 1557 * If we failed, we need to notify everyone that this CPU is back on. 1558 */ 1559 if (error != 0) 1560 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1561 1562 return (error); 1563 } 1564 1565 /* 1566 * Mark the indicated CPU as faulted, taking it offline. 1567 */ 1568 int 1569 cpu_faulted(cpu_t *cp, int flags) 1570 { 1571 int error = 0; 1572 1573 ASSERT(MUTEX_HELD(&cpu_lock)); 1574 ASSERT(!cpu_is_poweredoff(cp)); 1575 1576 if (cpu_is_offline(cp)) { 1577 cp->cpu_flags &= ~CPU_SPARE; 1578 cp->cpu_flags |= CPU_FAULTED; 1579 mp_cpu_faulted_enter(cp); 1580 cpu_set_state(cp); 1581 return (0); 1582 } 1583 1584 if ((error = cpu_offline(cp, flags)) == 0) { 1585 cp->cpu_flags |= CPU_FAULTED; 1586 mp_cpu_faulted_enter(cp); 1587 cpu_set_state(cp); 1588 } 1589 1590 return (error); 1591 } 1592 1593 /* 1594 * Mark the indicated CPU as a spare, taking it offline. 1595 */ 1596 int 1597 cpu_spare(cpu_t *cp, int flags) 1598 { 1599 int error = 0; 1600 1601 ASSERT(MUTEX_HELD(&cpu_lock)); 1602 ASSERT(!cpu_is_poweredoff(cp)); 1603 1604 if (cpu_is_offline(cp)) { 1605 if (cp->cpu_flags & CPU_FAULTED) { 1606 cp->cpu_flags &= ~CPU_FAULTED; 1607 mp_cpu_faulted_exit(cp); 1608 } 1609 cp->cpu_flags |= CPU_SPARE; 1610 cpu_set_state(cp); 1611 return (0); 1612 } 1613 1614 if ((error = cpu_offline(cp, flags)) == 0) { 1615 cp->cpu_flags |= CPU_SPARE; 1616 cpu_set_state(cp); 1617 } 1618 1619 return (error); 1620 } 1621 1622 /* 1623 * Take the indicated CPU from poweroff to offline. 1624 */ 1625 int 1626 cpu_poweron(cpu_t *cp) 1627 { 1628 int error = ENOTSUP; 1629 1630 ASSERT(MUTEX_HELD(&cpu_lock)); 1631 ASSERT(cpu_is_poweredoff(cp)); 1632 1633 error = mp_cpu_poweron(cp); /* arch-dep hook */ 1634 if (error == 0) 1635 cpu_set_state(cp); 1636 1637 return (error); 1638 } 1639 1640 /* 1641 * Take the indicated CPU from any inactive state to powered off. 1642 */ 1643 int 1644 cpu_poweroff(cpu_t *cp) 1645 { 1646 int error = ENOTSUP; 1647 1648 ASSERT(MUTEX_HELD(&cpu_lock)); 1649 ASSERT(cpu_is_offline(cp)); 1650 1651 if (!(cp->cpu_flags & CPU_QUIESCED)) 1652 return (EBUSY); /* not completely idle */ 1653 1654 error = mp_cpu_poweroff(cp); /* arch-dep hook */ 1655 if (error == 0) 1656 cpu_set_state(cp); 1657 1658 return (error); 1659 } 1660 1661 /* 1662 * Initialize the Sequential CPU id lookup table 1663 */ 1664 void 1665 cpu_seq_tbl_init() 1666 { 1667 cpu_t **tbl; 1668 1669 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP); 1670 tbl[0] = CPU; 1671 1672 cpu_seq = tbl; 1673 } 1674 1675 /* 1676 * Initialize the CPU lists for the first CPU. 1677 */ 1678 void 1679 cpu_list_init(cpu_t *cp) 1680 { 1681 cp->cpu_next = cp; 1682 cp->cpu_prev = cp; 1683 cpu_list = cp; 1684 clock_cpu_list = cp; 1685 1686 cp->cpu_next_onln = cp; 1687 cp->cpu_prev_onln = cp; 1688 cpu_active = cp; 1689 1690 cp->cpu_seqid = 0; 1691 CPUSET_ADD(cpu_seqid_inuse, 0); 1692 1693 /* 1694 * Bootstrap cpu_seq using cpu_list 1695 * The cpu_seq[] table will be dynamically allocated 1696 * when kmem later becomes available (but before going MP) 1697 */ 1698 cpu_seq = &cpu_list; 1699 1700 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1701 cp_default.cp_cpulist = cp; 1702 cp_default.cp_ncpus = 1; 1703 cp->cpu_next_part = cp; 1704 cp->cpu_prev_part = cp; 1705 cp->cpu_part = &cp_default; 1706 1707 CPUSET_ADD(cpu_available, cp->cpu_id); 1708 } 1709 1710 /* 1711 * Insert a CPU into the list of available CPUs. 1712 */ 1713 void 1714 cpu_add_unit(cpu_t *cp) 1715 { 1716 int seqid; 1717 1718 ASSERT(MUTEX_HELD(&cpu_lock)); 1719 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1720 1721 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0); 1722 1723 /* 1724 * Note: most users of the cpu_list will grab the 1725 * cpu_lock to insure that it isn't modified. However, 1726 * certain users can't or won't do that. To allow this 1727 * we pause the other cpus. Users who walk the list 1728 * without cpu_lock, must disable kernel preemption 1729 * to insure that the list isn't modified underneath 1730 * them. Also, any cached pointers to cpu structures 1731 * must be revalidated by checking to see if the 1732 * cpu_next pointer points to itself. This check must 1733 * be done with the cpu_lock held or kernel preemption 1734 * disabled. This check relies upon the fact that 1735 * old cpu structures are not free'ed or cleared after 1736 * then are removed from the cpu_list. 1737 * 1738 * Note that the clock code walks the cpu list dereferencing 1739 * the cpu_part pointer, so we need to initialize it before 1740 * adding the cpu to the list. 1741 */ 1742 cp->cpu_part = &cp_default; 1743 (void) pause_cpus(NULL); 1744 cp->cpu_next = cpu_list; 1745 cp->cpu_prev = cpu_list->cpu_prev; 1746 cpu_list->cpu_prev->cpu_next = cp; 1747 cpu_list->cpu_prev = cp; 1748 start_cpus(); 1749 1750 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++) 1751 continue; 1752 CPUSET_ADD(cpu_seqid_inuse, seqid); 1753 cp->cpu_seqid = seqid; 1754 ASSERT(ncpus < max_ncpus); 1755 ncpus++; 1756 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1757 cpu[cp->cpu_id] = cp; 1758 CPUSET_ADD(cpu_available, cp->cpu_id); 1759 cpu_seq[cp->cpu_seqid] = cp; 1760 1761 /* 1762 * allocate a pause thread for this CPU. 1763 */ 1764 cpu_pause_alloc(cp); 1765 1766 /* 1767 * So that new CPUs won't have NULL prev_onln and next_onln pointers, 1768 * link them into a list of just that CPU. 1769 * This is so that disp_lowpri_cpu will work for thread_create in 1770 * pause_cpus() when called from the startup thread in a new CPU. 1771 */ 1772 cp->cpu_next_onln = cp; 1773 cp->cpu_prev_onln = cp; 1774 cpu_info_kstat_create(cp); 1775 cp->cpu_next_part = cp; 1776 cp->cpu_prev_part = cp; 1777 1778 init_cpu_mstate(cp, CMS_SYSTEM); 1779 1780 pool_pset_mod = gethrtime(); 1781 } 1782 1783 /* 1784 * Do the opposite of cpu_add_unit(). 1785 */ 1786 void 1787 cpu_del_unit(int cpuid) 1788 { 1789 struct cpu *cp, *cpnext; 1790 1791 ASSERT(MUTEX_HELD(&cpu_lock)); 1792 cp = cpu[cpuid]; 1793 ASSERT(cp != NULL); 1794 1795 ASSERT(cp->cpu_next_onln == cp); 1796 ASSERT(cp->cpu_prev_onln == cp); 1797 ASSERT(cp->cpu_next_part == cp); 1798 ASSERT(cp->cpu_prev_part == cp); 1799 1800 /* 1801 * Tear down the CPU's physical ID cache, and update any 1802 * processor groups 1803 */ 1804 pg_cpu_fini(cp); 1805 pghw_physid_destroy(cp); 1806 1807 /* 1808 * Destroy kstat stuff. 1809 */ 1810 cpu_info_kstat_destroy(cp); 1811 term_cpu_mstate(cp); 1812 /* 1813 * Free up pause thread. 1814 */ 1815 cpu_pause_free(cp); 1816 CPUSET_DEL(cpu_available, cp->cpu_id); 1817 cpu[cp->cpu_id] = NULL; 1818 cpu_seq[cp->cpu_seqid] = NULL; 1819 1820 /* 1821 * The clock thread and mutex_vector_enter cannot hold the 1822 * cpu_lock while traversing the cpu list, therefore we pause 1823 * all other threads by pausing the other cpus. These, and any 1824 * other routines holding cpu pointers while possibly sleeping 1825 * must be sure to call kpreempt_disable before processing the 1826 * list and be sure to check that the cpu has not been deleted 1827 * after any sleeps (check cp->cpu_next != NULL). We guarantee 1828 * to keep the deleted cpu structure around. 1829 * 1830 * Note that this MUST be done AFTER cpu_available 1831 * has been updated so that we don't waste time 1832 * trying to pause the cpu we're trying to delete. 1833 */ 1834 (void) pause_cpus(NULL); 1835 1836 cpnext = cp->cpu_next; 1837 cp->cpu_prev->cpu_next = cp->cpu_next; 1838 cp->cpu_next->cpu_prev = cp->cpu_prev; 1839 if (cp == cpu_list) 1840 cpu_list = cpnext; 1841 1842 /* 1843 * Signals that the cpu has been deleted (see above). 1844 */ 1845 cp->cpu_next = NULL; 1846 cp->cpu_prev = NULL; 1847 1848 start_cpus(); 1849 1850 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid); 1851 ncpus--; 1852 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0); 1853 1854 pool_pset_mod = gethrtime(); 1855 } 1856 1857 /* 1858 * Add a CPU to the list of active CPUs. 1859 * This routine must not get any locks, because other CPUs are paused. 1860 */ 1861 static void 1862 cpu_add_active_internal(cpu_t *cp) 1863 { 1864 cpupart_t *pp = cp->cpu_part; 1865 1866 ASSERT(MUTEX_HELD(&cpu_lock)); 1867 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1868 1869 ncpus_online++; 1870 cpu_set_state(cp); 1871 cp->cpu_next_onln = cpu_active; 1872 cp->cpu_prev_onln = cpu_active->cpu_prev_onln; 1873 cpu_active->cpu_prev_onln->cpu_next_onln = cp; 1874 cpu_active->cpu_prev_onln = cp; 1875 1876 if (pp->cp_cpulist) { 1877 cp->cpu_next_part = pp->cp_cpulist; 1878 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part; 1879 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp; 1880 pp->cp_cpulist->cpu_prev_part = cp; 1881 } else { 1882 ASSERT(pp->cp_ncpus == 0); 1883 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 1884 } 1885 pp->cp_ncpus++; 1886 if (pp->cp_ncpus == 1) { 1887 cp_numparts_nonempty++; 1888 ASSERT(cp_numparts_nonempty != 0); 1889 } 1890 1891 pg_cpu_active(cp); 1892 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); 1893 1894 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); 1895 } 1896 1897 /* 1898 * Add a CPU to the list of active CPUs. 1899 * This is called from machine-dependent layers when a new CPU is started. 1900 */ 1901 void 1902 cpu_add_active(cpu_t *cp) 1903 { 1904 pg_cpupart_in(cp, cp->cpu_part); 1905 1906 pause_cpus(NULL); 1907 cpu_add_active_internal(cp); 1908 start_cpus(); 1909 1910 cpu_stats_kstat_create(cp); 1911 cpu_create_intrstat(cp); 1912 lgrp_kstat_create(cp); 1913 cpu_state_change_notify(cp->cpu_id, CPU_INIT); 1914 } 1915 1916 1917 /* 1918 * Remove a CPU from the list of active CPUs. 1919 * This routine must not get any locks, because other CPUs are paused. 1920 */ 1921 /* ARGSUSED */ 1922 static void 1923 cpu_remove_active(cpu_t *cp) 1924 { 1925 cpupart_t *pp = cp->cpu_part; 1926 1927 ASSERT(MUTEX_HELD(&cpu_lock)); 1928 ASSERT(cp->cpu_next_onln != cp); /* not the last one */ 1929 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ 1930 1931 pg_cpu_inactive(cp); 1932 1933 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); 1934 1935 if (cp == clock_cpu_list) 1936 clock_cpu_list = cp->cpu_next_onln; 1937 1938 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln; 1939 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln; 1940 if (cpu_active == cp) { 1941 cpu_active = cp->cpu_next_onln; 1942 } 1943 cp->cpu_next_onln = cp; 1944 cp->cpu_prev_onln = cp; 1945 1946 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 1947 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 1948 if (pp->cp_cpulist == cp) { 1949 pp->cp_cpulist = cp->cpu_next_part; 1950 ASSERT(pp->cp_cpulist != cp); 1951 } 1952 cp->cpu_next_part = cp; 1953 cp->cpu_prev_part = cp; 1954 pp->cp_ncpus--; 1955 if (pp->cp_ncpus == 0) { 1956 cp_numparts_nonempty--; 1957 ASSERT(cp_numparts_nonempty != 0); 1958 } 1959 } 1960 1961 /* 1962 * Routine used to setup a newly inserted CPU in preparation for starting 1963 * it running code. 1964 */ 1965 int 1966 cpu_configure(int cpuid) 1967 { 1968 int retval = 0; 1969 1970 ASSERT(MUTEX_HELD(&cpu_lock)); 1971 1972 /* 1973 * Some structures are statically allocated based upon 1974 * the maximum number of cpus the system supports. Do not 1975 * try to add anything beyond this limit. 1976 */ 1977 if (cpuid < 0 || cpuid >= NCPU) { 1978 return (EINVAL); 1979 } 1980 1981 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) { 1982 return (EALREADY); 1983 } 1984 1985 if ((retval = mp_cpu_configure(cpuid)) != 0) { 1986 return (retval); 1987 } 1988 1989 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF; 1990 cpu_set_state(cpu[cpuid]); 1991 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG); 1992 if (retval != 0) 1993 (void) mp_cpu_unconfigure(cpuid); 1994 1995 return (retval); 1996 } 1997 1998 /* 1999 * Routine used to cleanup a CPU that has been powered off. This will 2000 * destroy all per-cpu information related to this cpu. 2001 */ 2002 int 2003 cpu_unconfigure(int cpuid) 2004 { 2005 int error; 2006 2007 ASSERT(MUTEX_HELD(&cpu_lock)); 2008 2009 if (cpu[cpuid] == NULL) { 2010 return (ENODEV); 2011 } 2012 2013 if (cpu[cpuid]->cpu_flags == 0) { 2014 return (EALREADY); 2015 } 2016 2017 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) { 2018 return (EBUSY); 2019 } 2020 2021 if (cpu[cpuid]->cpu_props != NULL) { 2022 (void) nvlist_free(cpu[cpuid]->cpu_props); 2023 cpu[cpuid]->cpu_props = NULL; 2024 } 2025 2026 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG); 2027 2028 if (error != 0) 2029 return (error); 2030 2031 return (mp_cpu_unconfigure(cpuid)); 2032 } 2033 2034 /* 2035 * Routines for registering and de-registering cpu_setup callback functions. 2036 * 2037 * Caller's context 2038 * These routines must not be called from a driver's attach(9E) or 2039 * detach(9E) entry point. 2040 * 2041 * NOTE: CPU callbacks should not block. They are called with cpu_lock held. 2042 */ 2043 2044 /* 2045 * Ideally, these would be dynamically allocated and put into a linked 2046 * list; however that is not feasible because the registration routine 2047 * has to be available before the kmem allocator is working (in fact, 2048 * it is called by the kmem allocator init code). In any case, there 2049 * are quite a few extra entries for future users. 2050 */ 2051 #define NCPU_SETUPS 20 2052 2053 struct cpu_setup { 2054 cpu_setup_func_t *func; 2055 void *arg; 2056 } cpu_setups[NCPU_SETUPS]; 2057 2058 void 2059 register_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2060 { 2061 int i; 2062 2063 ASSERT(MUTEX_HELD(&cpu_lock)); 2064 2065 for (i = 0; i < NCPU_SETUPS; i++) 2066 if (cpu_setups[i].func == NULL) 2067 break; 2068 if (i >= NCPU_SETUPS) 2069 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries"); 2070 2071 cpu_setups[i].func = func; 2072 cpu_setups[i].arg = arg; 2073 } 2074 2075 void 2076 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2077 { 2078 int i; 2079 2080 ASSERT(MUTEX_HELD(&cpu_lock)); 2081 2082 for (i = 0; i < NCPU_SETUPS; i++) 2083 if ((cpu_setups[i].func == func) && 2084 (cpu_setups[i].arg == arg)) 2085 break; 2086 if (i >= NCPU_SETUPS) 2087 cmn_err(CE_PANIC, "Could not find cpu_setup callback to " 2088 "deregister"); 2089 2090 cpu_setups[i].func = NULL; 2091 cpu_setups[i].arg = 0; 2092 } 2093 2094 /* 2095 * Call any state change hooks for this CPU, ignore any errors. 2096 */ 2097 void 2098 cpu_state_change_notify(int id, cpu_setup_t what) 2099 { 2100 int i; 2101 2102 ASSERT(MUTEX_HELD(&cpu_lock)); 2103 2104 for (i = 0; i < NCPU_SETUPS; i++) { 2105 if (cpu_setups[i].func != NULL) { 2106 cpu_setups[i].func(what, id, cpu_setups[i].arg); 2107 } 2108 } 2109 } 2110 2111 /* 2112 * Call any state change hooks for this CPU, undo it if error found. 2113 */ 2114 static int 2115 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo) 2116 { 2117 int i; 2118 int retval = 0; 2119 2120 ASSERT(MUTEX_HELD(&cpu_lock)); 2121 2122 for (i = 0; i < NCPU_SETUPS; i++) { 2123 if (cpu_setups[i].func != NULL) { 2124 retval = cpu_setups[i].func(what, id, 2125 cpu_setups[i].arg); 2126 if (retval) { 2127 for (i--; i >= 0; i--) { 2128 if (cpu_setups[i].func != NULL) 2129 cpu_setups[i].func(undo, 2130 id, cpu_setups[i].arg); 2131 } 2132 break; 2133 } 2134 } 2135 } 2136 return (retval); 2137 } 2138 2139 /* 2140 * Export information about this CPU via the kstat mechanism. 2141 */ 2142 static struct { 2143 kstat_named_t ci_state; 2144 kstat_named_t ci_state_begin; 2145 kstat_named_t ci_cpu_type; 2146 kstat_named_t ci_fpu_type; 2147 kstat_named_t ci_clock_MHz; 2148 kstat_named_t ci_chip_id; 2149 kstat_named_t ci_implementation; 2150 kstat_named_t ci_brandstr; 2151 kstat_named_t ci_core_id; 2152 kstat_named_t ci_curr_clock_Hz; 2153 kstat_named_t ci_supp_freq_Hz; 2154 #if defined(__sparcv9) 2155 kstat_named_t ci_device_ID; 2156 kstat_named_t ci_cpu_fru; 2157 #endif 2158 #if defined(__x86) 2159 kstat_named_t ci_vendorstr; 2160 kstat_named_t ci_family; 2161 kstat_named_t ci_model; 2162 kstat_named_t ci_step; 2163 kstat_named_t ci_clogid; 2164 kstat_named_t ci_pkg_core_id; 2165 kstat_named_t ci_ncpuperchip; 2166 kstat_named_t ci_ncoreperchip; 2167 kstat_named_t ci_max_cstates; 2168 kstat_named_t ci_curr_cstate; 2169 kstat_named_t ci_sktstr; 2170 #endif 2171 } cpu_info_template = { 2172 { "state", KSTAT_DATA_CHAR }, 2173 { "state_begin", KSTAT_DATA_LONG }, 2174 { "cpu_type", KSTAT_DATA_CHAR }, 2175 { "fpu_type", KSTAT_DATA_CHAR }, 2176 { "clock_MHz", KSTAT_DATA_LONG }, 2177 { "chip_id", KSTAT_DATA_LONG }, 2178 { "implementation", KSTAT_DATA_STRING }, 2179 { "brand", KSTAT_DATA_STRING }, 2180 { "core_id", KSTAT_DATA_LONG }, 2181 { "current_clock_Hz", KSTAT_DATA_UINT64 }, 2182 { "supported_frequencies_Hz", KSTAT_DATA_STRING }, 2183 #if defined(__sparcv9) 2184 { "device_ID", KSTAT_DATA_UINT64 }, 2185 { "cpu_fru", KSTAT_DATA_STRING }, 2186 #endif 2187 #if defined(__x86) 2188 { "vendor_id", KSTAT_DATA_STRING }, 2189 { "family", KSTAT_DATA_INT32 }, 2190 { "model", KSTAT_DATA_INT32 }, 2191 { "stepping", KSTAT_DATA_INT32 }, 2192 { "clog_id", KSTAT_DATA_INT32 }, 2193 { "pkg_core_id", KSTAT_DATA_LONG }, 2194 { "ncpu_per_chip", KSTAT_DATA_INT32 }, 2195 { "ncore_per_chip", KSTAT_DATA_INT32 }, 2196 { "supported_max_cstates", KSTAT_DATA_INT32 }, 2197 { "current_cstate", KSTAT_DATA_INT32 }, 2198 { "socket_type", KSTAT_DATA_STRING }, 2199 #endif 2200 }; 2201 2202 static kmutex_t cpu_info_template_lock; 2203 2204 static int 2205 cpu_info_kstat_update(kstat_t *ksp, int rw) 2206 { 2207 cpu_t *cp = ksp->ks_private; 2208 const char *pi_state; 2209 2210 if (rw == KSTAT_WRITE) 2211 return (EACCES); 2212 2213 switch (cp->cpu_type_info.pi_state) { 2214 case P_ONLINE: 2215 pi_state = PS_ONLINE; 2216 break; 2217 case P_POWEROFF: 2218 pi_state = PS_POWEROFF; 2219 break; 2220 case P_NOINTR: 2221 pi_state = PS_NOINTR; 2222 break; 2223 case P_FAULTED: 2224 pi_state = PS_FAULTED; 2225 break; 2226 case P_SPARE: 2227 pi_state = PS_SPARE; 2228 break; 2229 case P_OFFLINE: 2230 pi_state = PS_OFFLINE; 2231 break; 2232 default: 2233 pi_state = "unknown"; 2234 } 2235 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state); 2236 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin; 2237 (void) strncpy(cpu_info_template.ci_cpu_type.value.c, 2238 cp->cpu_type_info.pi_processor_type, 15); 2239 (void) strncpy(cpu_info_template.ci_fpu_type.value.c, 2240 cp->cpu_type_info.pi_fputypes, 15); 2241 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; 2242 cpu_info_template.ci_chip_id.value.l = 2243 pg_plat_hw_instance_id(cp, PGHW_CHIP); 2244 kstat_named_setstr(&cpu_info_template.ci_implementation, 2245 cp->cpu_idstr); 2246 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); 2247 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); 2248 cpu_info_template.ci_curr_clock_Hz.value.ui64 = 2249 cp->cpu_curr_clock; 2250 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz, 2251 cp->cpu_supp_freqs); 2252 #if defined(__sparcv9) 2253 cpu_info_template.ci_device_ID.value.ui64 = 2254 cpunodes[cp->cpu_id].device_id; 2255 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp)); 2256 #endif 2257 #if defined(__x86) 2258 kstat_named_setstr(&cpu_info_template.ci_vendorstr, 2259 cpuid_getvendorstr(cp)); 2260 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); 2261 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); 2262 cpu_info_template.ci_step.value.l = cpuid_getstep(cp); 2263 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); 2264 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp); 2265 cpu_info_template.ci_ncoreperchip.value.l = 2266 cpuid_get_ncore_per_chip(cp); 2267 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); 2268 cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates; 2269 cpu_info_template.ci_curr_cstate.value.l = cp->cpu_m.curr_cstate; 2270 kstat_named_setstr(&cpu_info_template.ci_sktstr, 2271 cpuid_getsocketstr(cp)); 2272 #endif 2273 2274 return (0); 2275 } 2276 2277 static void 2278 cpu_info_kstat_create(cpu_t *cp) 2279 { 2280 zoneid_t zoneid; 2281 2282 ASSERT(MUTEX_HELD(&cpu_lock)); 2283 2284 if (pool_pset_enabled()) 2285 zoneid = GLOBAL_ZONEID; 2286 else 2287 zoneid = ALL_ZONES; 2288 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id, 2289 NULL, "misc", KSTAT_TYPE_NAMED, 2290 sizeof (cpu_info_template) / sizeof (kstat_named_t), 2291 KSTAT_FLAG_VIRTUAL, zoneid)) != NULL) { 2292 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN; 2293 #if defined(__sparcv9) 2294 cp->cpu_info_kstat->ks_data_size += 2295 strlen(cpu_fru_fmri(cp)) + 1; 2296 #endif 2297 #if defined(__x86) 2298 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN; 2299 #endif 2300 if (cp->cpu_supp_freqs != NULL) 2301 cp->cpu_info_kstat->ks_data_size += 2302 strlen(cp->cpu_supp_freqs) + 1; 2303 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock; 2304 cp->cpu_info_kstat->ks_data = &cpu_info_template; 2305 cp->cpu_info_kstat->ks_private = cp; 2306 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update; 2307 kstat_install(cp->cpu_info_kstat); 2308 } 2309 } 2310 2311 static void 2312 cpu_info_kstat_destroy(cpu_t *cp) 2313 { 2314 ASSERT(MUTEX_HELD(&cpu_lock)); 2315 2316 kstat_delete(cp->cpu_info_kstat); 2317 cp->cpu_info_kstat = NULL; 2318 } 2319 2320 /* 2321 * Create and install kstats for the boot CPU. 2322 */ 2323 void 2324 cpu_kstat_init(cpu_t *cp) 2325 { 2326 mutex_enter(&cpu_lock); 2327 cpu_info_kstat_create(cp); 2328 cpu_stats_kstat_create(cp); 2329 cpu_create_intrstat(cp); 2330 cpu_set_state(cp); 2331 mutex_exit(&cpu_lock); 2332 } 2333 2334 /* 2335 * Make visible to the zone that subset of the cpu information that would be 2336 * initialized when a cpu is configured (but still offline). 2337 */ 2338 void 2339 cpu_visibility_configure(cpu_t *cp, zone_t *zone) 2340 { 2341 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2342 2343 ASSERT(MUTEX_HELD(&cpu_lock)); 2344 ASSERT(pool_pset_enabled()); 2345 ASSERT(cp != NULL); 2346 2347 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2348 zone->zone_ncpus++; 2349 ASSERT(zone->zone_ncpus <= ncpus); 2350 } 2351 if (cp->cpu_info_kstat != NULL) 2352 kstat_zone_add(cp->cpu_info_kstat, zoneid); 2353 } 2354 2355 /* 2356 * Make visible to the zone that subset of the cpu information that would be 2357 * initialized when a previously configured cpu is onlined. 2358 */ 2359 void 2360 cpu_visibility_online(cpu_t *cp, zone_t *zone) 2361 { 2362 kstat_t *ksp; 2363 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2364 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2365 processorid_t cpun; 2366 2367 ASSERT(MUTEX_HELD(&cpu_lock)); 2368 ASSERT(pool_pset_enabled()); 2369 ASSERT(cp != NULL); 2370 ASSERT(cpu_is_active(cp)); 2371 2372 cpun = cp->cpu_id; 2373 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2374 zone->zone_ncpus_online++; 2375 ASSERT(zone->zone_ncpus_online <= ncpus_online); 2376 } 2377 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2378 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2379 != NULL) { 2380 kstat_zone_add(ksp, zoneid); 2381 kstat_rele(ksp); 2382 } 2383 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2384 kstat_zone_add(ksp, zoneid); 2385 kstat_rele(ksp); 2386 } 2387 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2388 kstat_zone_add(ksp, zoneid); 2389 kstat_rele(ksp); 2390 } 2391 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2392 NULL) { 2393 kstat_zone_add(ksp, zoneid); 2394 kstat_rele(ksp); 2395 } 2396 } 2397 2398 /* 2399 * Update relevant kstats such that cpu is now visible to processes 2400 * executing in specified zone. 2401 */ 2402 void 2403 cpu_visibility_add(cpu_t *cp, zone_t *zone) 2404 { 2405 cpu_visibility_configure(cp, zone); 2406 if (cpu_is_active(cp)) 2407 cpu_visibility_online(cp, zone); 2408 } 2409 2410 /* 2411 * Make invisible to the zone that subset of the cpu information that would be 2412 * torn down when a previously offlined cpu is unconfigured. 2413 */ 2414 void 2415 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone) 2416 { 2417 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2418 2419 ASSERT(MUTEX_HELD(&cpu_lock)); 2420 ASSERT(pool_pset_enabled()); 2421 ASSERT(cp != NULL); 2422 2423 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2424 ASSERT(zone->zone_ncpus != 0); 2425 zone->zone_ncpus--; 2426 } 2427 if (cp->cpu_info_kstat) 2428 kstat_zone_remove(cp->cpu_info_kstat, zoneid); 2429 } 2430 2431 /* 2432 * Make invisible to the zone that subset of the cpu information that would be 2433 * torn down when a cpu is offlined (but still configured). 2434 */ 2435 void 2436 cpu_visibility_offline(cpu_t *cp, zone_t *zone) 2437 { 2438 kstat_t *ksp; 2439 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2440 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2441 processorid_t cpun; 2442 2443 ASSERT(MUTEX_HELD(&cpu_lock)); 2444 ASSERT(pool_pset_enabled()); 2445 ASSERT(cp != NULL); 2446 ASSERT(cpu_is_active(cp)); 2447 2448 cpun = cp->cpu_id; 2449 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2450 ASSERT(zone->zone_ncpus_online != 0); 2451 zone->zone_ncpus_online--; 2452 } 2453 2454 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2455 NULL) { 2456 kstat_zone_remove(ksp, zoneid); 2457 kstat_rele(ksp); 2458 } 2459 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2460 kstat_zone_remove(ksp, zoneid); 2461 kstat_rele(ksp); 2462 } 2463 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2464 kstat_zone_remove(ksp, zoneid); 2465 kstat_rele(ksp); 2466 } 2467 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2468 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2469 != NULL) { 2470 kstat_zone_remove(ksp, zoneid); 2471 kstat_rele(ksp); 2472 } 2473 } 2474 2475 /* 2476 * Update relevant kstats such that cpu is no longer visible to processes 2477 * executing in specified zone. 2478 */ 2479 void 2480 cpu_visibility_remove(cpu_t *cp, zone_t *zone) 2481 { 2482 if (cpu_is_active(cp)) 2483 cpu_visibility_offline(cp, zone); 2484 cpu_visibility_unconfigure(cp, zone); 2485 } 2486 2487 /* 2488 * Bind a thread to a CPU as requested. 2489 */ 2490 int 2491 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind, 2492 int *error) 2493 { 2494 processorid_t binding; 2495 cpu_t *cp = NULL; 2496 2497 ASSERT(MUTEX_HELD(&cpu_lock)); 2498 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 2499 2500 thread_lock(tp); 2501 2502 /* 2503 * Record old binding, but change the obind, which was initialized 2504 * to PBIND_NONE, only if this thread has a binding. This avoids 2505 * reporting PBIND_NONE for a process when some LWPs are bound. 2506 */ 2507 binding = tp->t_bind_cpu; 2508 if (binding != PBIND_NONE) 2509 *obind = binding; /* record old binding */ 2510 2511 switch (bind) { 2512 case PBIND_QUERY: 2513 /* Just return the old binding */ 2514 thread_unlock(tp); 2515 return (0); 2516 2517 case PBIND_QUERY_TYPE: 2518 /* Return the binding type */ 2519 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD; 2520 thread_unlock(tp); 2521 return (0); 2522 2523 case PBIND_SOFT: 2524 /* 2525 * Set soft binding for this thread and return the actual 2526 * binding 2527 */ 2528 TB_CPU_SOFT_SET(tp); 2529 thread_unlock(tp); 2530 return (0); 2531 2532 case PBIND_HARD: 2533 /* 2534 * Set hard binding for this thread and return the actual 2535 * binding 2536 */ 2537 TB_CPU_HARD_SET(tp); 2538 thread_unlock(tp); 2539 return (0); 2540 2541 default: 2542 break; 2543 } 2544 2545 /* 2546 * If this thread/LWP cannot be bound because of permission 2547 * problems, just note that and return success so that the 2548 * other threads/LWPs will be bound. This is the way 2549 * processor_bind() is defined to work. 2550 * 2551 * Binding will get EPERM if the thread is of system class 2552 * or hasprocperm() fails. 2553 */ 2554 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) { 2555 *error = EPERM; 2556 thread_unlock(tp); 2557 return (0); 2558 } 2559 2560 binding = bind; 2561 if (binding != PBIND_NONE) { 2562 cp = cpu_get((processorid_t)binding); 2563 /* 2564 * Make sure binding is valid and is in right partition. 2565 */ 2566 if (cp == NULL || tp->t_cpupart != cp->cpu_part) { 2567 *error = EINVAL; 2568 thread_unlock(tp); 2569 return (0); 2570 } 2571 } 2572 tp->t_bind_cpu = binding; /* set new binding */ 2573 2574 /* 2575 * If there is no system-set reason for affinity, set 2576 * the t_bound_cpu field to reflect the binding. 2577 */ 2578 if (tp->t_affinitycnt == 0) { 2579 if (binding == PBIND_NONE) { 2580 /* 2581 * We may need to adjust disp_max_unbound_pri 2582 * since we're becoming unbound. 2583 */ 2584 disp_adjust_unbound_pri(tp); 2585 2586 tp->t_bound_cpu = NULL; /* set new binding */ 2587 2588 /* 2589 * Move thread to lgroup with strongest affinity 2590 * after unbinding 2591 */ 2592 if (tp->t_lgrp_affinity) 2593 lgrp_move_thread(tp, 2594 lgrp_choose(tp, tp->t_cpupart), 1); 2595 2596 if (tp->t_state == TS_ONPROC && 2597 tp->t_cpu->cpu_part != tp->t_cpupart) 2598 cpu_surrender(tp); 2599 } else { 2600 lpl_t *lpl; 2601 2602 tp->t_bound_cpu = cp; 2603 ASSERT(cp->cpu_lpl != NULL); 2604 2605 /* 2606 * Set home to lgroup with most affinity containing CPU 2607 * that thread is being bound or minimum bounding 2608 * lgroup if no affinities set 2609 */ 2610 if (tp->t_lgrp_affinity) 2611 lpl = lgrp_affinity_best(tp, tp->t_cpupart, 2612 LGRP_NONE, B_FALSE); 2613 else 2614 lpl = cp->cpu_lpl; 2615 2616 if (tp->t_lpl != lpl) { 2617 /* can't grab cpu_lock */ 2618 lgrp_move_thread(tp, lpl, 1); 2619 } 2620 2621 /* 2622 * Make the thread switch to the bound CPU. 2623 * If the thread is runnable, we need to 2624 * requeue it even if t_cpu is already set 2625 * to the right CPU, since it may be on a 2626 * kpreempt queue and need to move to a local 2627 * queue. We could check t_disp_queue to 2628 * avoid unnecessary overhead if it's already 2629 * on the right queue, but since this isn't 2630 * a performance-critical operation it doesn't 2631 * seem worth the extra code and complexity. 2632 * 2633 * If the thread is weakbound to the cpu then it will 2634 * resist the new binding request until the weak 2635 * binding drops. The cpu_surrender or requeueing 2636 * below could be skipped in such cases (since it 2637 * will have no effect), but that would require 2638 * thread_allowmigrate to acquire thread_lock so 2639 * we'll take the very occasional hit here instead. 2640 */ 2641 if (tp->t_state == TS_ONPROC) { 2642 cpu_surrender(tp); 2643 } else if (tp->t_state == TS_RUN) { 2644 cpu_t *ocp = tp->t_cpu; 2645 2646 (void) dispdeq(tp); 2647 setbackdq(tp); 2648 /* 2649 * Either on the bound CPU's disp queue now, 2650 * or swapped out or on the swap queue. 2651 */ 2652 ASSERT(tp->t_disp_queue == cp->cpu_disp || 2653 tp->t_weakbound_cpu == ocp || 2654 (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) 2655 != TS_LOAD); 2656 } 2657 } 2658 } 2659 2660 /* 2661 * Our binding has changed; set TP_CHANGEBIND. 2662 */ 2663 tp->t_proc_flag |= TP_CHANGEBIND; 2664 aston(tp); 2665 2666 thread_unlock(tp); 2667 2668 return (0); 2669 } 2670 2671 #if CPUSET_WORDS > 1 2672 2673 /* 2674 * Functions for implementing cpuset operations when a cpuset is more 2675 * than one word. On platforms where a cpuset is a single word these 2676 * are implemented as macros in cpuvar.h. 2677 */ 2678 2679 void 2680 cpuset_all(cpuset_t *s) 2681 { 2682 int i; 2683 2684 for (i = 0; i < CPUSET_WORDS; i++) 2685 s->cpub[i] = ~0UL; 2686 } 2687 2688 void 2689 cpuset_all_but(cpuset_t *s, uint_t cpu) 2690 { 2691 cpuset_all(s); 2692 CPUSET_DEL(*s, cpu); 2693 } 2694 2695 void 2696 cpuset_only(cpuset_t *s, uint_t cpu) 2697 { 2698 CPUSET_ZERO(*s); 2699 CPUSET_ADD(*s, cpu); 2700 } 2701 2702 int 2703 cpuset_isnull(cpuset_t *s) 2704 { 2705 int i; 2706 2707 for (i = 0; i < CPUSET_WORDS; i++) 2708 if (s->cpub[i] != 0) 2709 return (0); 2710 return (1); 2711 } 2712 2713 int 2714 cpuset_cmp(cpuset_t *s1, cpuset_t *s2) 2715 { 2716 int i; 2717 2718 for (i = 0; i < CPUSET_WORDS; i++) 2719 if (s1->cpub[i] != s2->cpub[i]) 2720 return (0); 2721 return (1); 2722 } 2723 2724 uint_t 2725 cpuset_find(cpuset_t *s) 2726 { 2727 2728 uint_t i; 2729 uint_t cpu = (uint_t)-1; 2730 2731 /* 2732 * Find a cpu in the cpuset 2733 */ 2734 for (i = 0; i < CPUSET_WORDS; i++) { 2735 cpu = (uint_t)(lowbit(s->cpub[i]) - 1); 2736 if (cpu != (uint_t)-1) { 2737 cpu += i * BT_NBIPUL; 2738 break; 2739 } 2740 } 2741 return (cpu); 2742 } 2743 2744 void 2745 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid) 2746 { 2747 int i, j; 2748 uint_t bit; 2749 2750 /* 2751 * First, find the smallest cpu id in the set. 2752 */ 2753 for (i = 0; i < CPUSET_WORDS; i++) { 2754 if (s->cpub[i] != 0) { 2755 bit = (uint_t)(lowbit(s->cpub[i]) - 1); 2756 ASSERT(bit != (uint_t)-1); 2757 *smallestid = bit + (i * BT_NBIPUL); 2758 2759 /* 2760 * Now find the largest cpu id in 2761 * the set and return immediately. 2762 * Done in an inner loop to avoid 2763 * having to break out of the first 2764 * loop. 2765 */ 2766 for (j = CPUSET_WORDS - 1; j >= i; j--) { 2767 if (s->cpub[j] != 0) { 2768 bit = (uint_t)(highbit(s->cpub[j]) - 1); 2769 ASSERT(bit != (uint_t)-1); 2770 *largestid = bit + (j * BT_NBIPUL); 2771 ASSERT(*largestid >= *smallestid); 2772 return; 2773 } 2774 } 2775 2776 /* 2777 * If this code is reached, a 2778 * smallestid was found, but not a 2779 * largestid. The cpuset must have 2780 * been changed during the course 2781 * of this function call. 2782 */ 2783 ASSERT(0); 2784 } 2785 } 2786 *smallestid = *largestid = CPUSET_NOTINSET; 2787 } 2788 2789 #endif /* CPUSET_WORDS */ 2790 2791 /* 2792 * Unbind threads bound to specified CPU. 2793 * 2794 * If `unbind_all_threads' is true, unbind all user threads bound to a given 2795 * CPU. Otherwise unbind all soft-bound user threads. 2796 */ 2797 int 2798 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads) 2799 { 2800 processorid_t obind; 2801 kthread_t *tp; 2802 int ret = 0; 2803 proc_t *pp; 2804 int err, berr = 0; 2805 2806 ASSERT(MUTEX_HELD(&cpu_lock)); 2807 2808 mutex_enter(&pidlock); 2809 for (pp = practive; pp != NULL; pp = pp->p_next) { 2810 mutex_enter(&pp->p_lock); 2811 tp = pp->p_tlist; 2812 /* 2813 * Skip zombies, kernel processes, and processes in 2814 * other zones, if called from a non-global zone. 2815 */ 2816 if (tp == NULL || (pp->p_flag & SSYS) || 2817 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) { 2818 mutex_exit(&pp->p_lock); 2819 continue; 2820 } 2821 do { 2822 if (tp->t_bind_cpu != cpu) 2823 continue; 2824 /* 2825 * Skip threads with hard binding when 2826 * `unbind_all_threads' is not specified. 2827 */ 2828 if (!unbind_all_threads && TB_CPU_IS_HARD(tp)) 2829 continue; 2830 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr); 2831 if (ret == 0) 2832 ret = err; 2833 } while ((tp = tp->t_forw) != pp->p_tlist); 2834 mutex_exit(&pp->p_lock); 2835 } 2836 mutex_exit(&pidlock); 2837 if (ret == 0) 2838 ret = berr; 2839 return (ret); 2840 } 2841 2842 2843 /* 2844 * Destroy all remaining bound threads on a cpu. 2845 */ 2846 void 2847 cpu_destroy_bound_threads(cpu_t *cp) 2848 { 2849 extern id_t syscid; 2850 register kthread_id_t t, tlist, tnext; 2851 2852 /* 2853 * Destroy all remaining bound threads on the cpu. This 2854 * should include both the interrupt threads and the idle thread. 2855 * This requires some care, since we need to traverse the 2856 * thread list with the pidlock mutex locked, but thread_free 2857 * also locks the pidlock mutex. So, we collect the threads 2858 * we're going to reap in a list headed by "tlist", then we 2859 * unlock the pidlock mutex and traverse the tlist list, 2860 * doing thread_free's on the thread's. Simple, n'est pas? 2861 * Also, this depends on thread_free not mucking with the 2862 * t_next and t_prev links of the thread. 2863 */ 2864 2865 if ((t = curthread) != NULL) { 2866 2867 tlist = NULL; 2868 mutex_enter(&pidlock); 2869 do { 2870 tnext = t->t_next; 2871 if (t->t_bound_cpu == cp) { 2872 2873 /* 2874 * We've found a bound thread, carefully unlink 2875 * it out of the thread list, and add it to 2876 * our "tlist". We "know" we don't have to 2877 * worry about unlinking curthread (the thread 2878 * that is executing this code). 2879 */ 2880 t->t_next->t_prev = t->t_prev; 2881 t->t_prev->t_next = t->t_next; 2882 t->t_next = tlist; 2883 tlist = t; 2884 ASSERT(t->t_cid == syscid); 2885 /* wake up anyone blocked in thread_join */ 2886 cv_broadcast(&t->t_joincv); 2887 /* 2888 * t_lwp set by interrupt threads and not 2889 * cleared. 2890 */ 2891 t->t_lwp = NULL; 2892 /* 2893 * Pause and idle threads always have 2894 * t_state set to TS_ONPROC. 2895 */ 2896 t->t_state = TS_FREE; 2897 t->t_prev = NULL; /* Just in case */ 2898 } 2899 2900 } while ((t = tnext) != curthread); 2901 2902 mutex_exit(&pidlock); 2903 2904 mutex_sync(); 2905 for (t = tlist; t != NULL; t = tnext) { 2906 tnext = t->t_next; 2907 thread_free(t); 2908 } 2909 } 2910 } 2911 2912 /* 2913 * Update the cpu_supp_freqs of this cpu. This information is returned 2914 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then 2915 * maintain the kstat data size. 2916 */ 2917 void 2918 cpu_set_supp_freqs(cpu_t *cp, const char *freqs) 2919 { 2920 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */ 2921 const char *lfreqs = clkstr; 2922 boolean_t kstat_exists = B_FALSE; 2923 kstat_t *ksp; 2924 size_t len; 2925 2926 /* 2927 * A NULL pointer means we only support one speed. 2928 */ 2929 if (freqs == NULL) 2930 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64, 2931 cp->cpu_curr_clock); 2932 else 2933 lfreqs = freqs; 2934 2935 /* 2936 * Make sure the frequency doesn't change while a snapshot is 2937 * going on. Of course, we only need to worry about this if 2938 * the kstat exists. 2939 */ 2940 if ((ksp = cp->cpu_info_kstat) != NULL) { 2941 mutex_enter(ksp->ks_lock); 2942 kstat_exists = B_TRUE; 2943 } 2944 2945 /* 2946 * Free any previously allocated string and if the kstat 2947 * already exists, then update its data size. 2948 */ 2949 if (cp->cpu_supp_freqs != NULL) { 2950 len = strlen(cp->cpu_supp_freqs) + 1; 2951 kmem_free(cp->cpu_supp_freqs, len); 2952 if (kstat_exists) 2953 ksp->ks_data_size -= len; 2954 } 2955 2956 /* 2957 * Allocate the new string and set the pointer. 2958 */ 2959 len = strlen(lfreqs) + 1; 2960 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP); 2961 (void) strcpy(cp->cpu_supp_freqs, lfreqs); 2962 2963 /* 2964 * If the kstat already exists then update the data size and 2965 * free the lock. 2966 */ 2967 if (kstat_exists) { 2968 ksp->ks_data_size += len; 2969 mutex_exit(ksp->ks_lock); 2970 } 2971 } 2972 2973 /* 2974 * Indicate the current CPU's clock freqency (in Hz). 2975 * The calling context must be such that CPU references are safe. 2976 */ 2977 void 2978 cpu_set_curr_clock(uint64_t new_clk) 2979 { 2980 uint64_t old_clk; 2981 2982 old_clk = CPU->cpu_curr_clock; 2983 CPU->cpu_curr_clock = new_clk; 2984 2985 /* 2986 * The cpu-change-speed DTrace probe exports the frequency in Hz 2987 */ 2988 DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id, 2989 uint64_t, old_clk, uint64_t, new_clk); 2990 } 2991 2992 /* 2993 * processor_info(2) and p_online(2) status support functions 2994 * The constants returned by the cpu_get_state() and cpu_get_state_str() are 2995 * for use in communicating processor state information to userland. Kernel 2996 * subsystems should only be using the cpu_flags value directly. Subsystems 2997 * modifying cpu_flags should record the state change via a call to the 2998 * cpu_set_state(). 2999 */ 3000 3001 /* 3002 * Update the pi_state of this CPU. This function provides the CPU status for 3003 * the information returned by processor_info(2). 3004 */ 3005 void 3006 cpu_set_state(cpu_t *cpu) 3007 { 3008 ASSERT(MUTEX_HELD(&cpu_lock)); 3009 cpu->cpu_type_info.pi_state = cpu_get_state(cpu); 3010 cpu->cpu_state_begin = gethrestime_sec(); 3011 pool_cpu_mod = gethrtime(); 3012 } 3013 3014 /* 3015 * Return offline/online/other status for the indicated CPU. Use only for 3016 * communication with user applications; cpu_flags provides the in-kernel 3017 * interface. 3018 */ 3019 int 3020 cpu_get_state(cpu_t *cpu) 3021 { 3022 ASSERT(MUTEX_HELD(&cpu_lock)); 3023 if (cpu->cpu_flags & CPU_POWEROFF) 3024 return (P_POWEROFF); 3025 else if (cpu->cpu_flags & CPU_FAULTED) 3026 return (P_FAULTED); 3027 else if (cpu->cpu_flags & CPU_SPARE) 3028 return (P_SPARE); 3029 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) 3030 return (P_OFFLINE); 3031 else if (cpu->cpu_flags & CPU_ENABLE) 3032 return (P_ONLINE); 3033 else 3034 return (P_NOINTR); 3035 } 3036 3037 /* 3038 * Return processor_info(2) state as a string. 3039 */ 3040 const char * 3041 cpu_get_state_str(cpu_t *cpu) 3042 { 3043 const char *string; 3044 3045 switch (cpu_get_state(cpu)) { 3046 case P_ONLINE: 3047 string = PS_ONLINE; 3048 break; 3049 case P_POWEROFF: 3050 string = PS_POWEROFF; 3051 break; 3052 case P_NOINTR: 3053 string = PS_NOINTR; 3054 break; 3055 case P_SPARE: 3056 string = PS_SPARE; 3057 break; 3058 case P_FAULTED: 3059 string = PS_FAULTED; 3060 break; 3061 case P_OFFLINE: 3062 string = PS_OFFLINE; 3063 break; 3064 default: 3065 string = "unknown"; 3066 break; 3067 } 3068 return (string); 3069 } 3070 3071 /* 3072 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named 3073 * kstats, respectively. This is done when a CPU is initialized or placed 3074 * online via p_online(2). 3075 */ 3076 static void 3077 cpu_stats_kstat_create(cpu_t *cp) 3078 { 3079 int instance = cp->cpu_id; 3080 char *module = "cpu"; 3081 char *class = "misc"; 3082 kstat_t *ksp; 3083 zoneid_t zoneid; 3084 3085 ASSERT(MUTEX_HELD(&cpu_lock)); 3086 3087 if (pool_pset_enabled()) 3088 zoneid = GLOBAL_ZONEID; 3089 else 3090 zoneid = ALL_ZONES; 3091 /* 3092 * Create named kstats 3093 */ 3094 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \ 3095 ksp = kstat_create_zone(module, instance, (name), class, \ 3096 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \ 3097 zoneid); \ 3098 if (ksp != NULL) { \ 3099 ksp->ks_private = cp; \ 3100 ksp->ks_update = (update_func); \ 3101 kstat_install(ksp); \ 3102 } else \ 3103 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \ 3104 module, instance, (name)); 3105 3106 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template), 3107 cpu_sys_stats_ks_update); 3108 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template), 3109 cpu_vm_stats_ks_update); 3110 3111 /* 3112 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat. 3113 */ 3114 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL, 3115 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid); 3116 if (ksp != NULL) { 3117 ksp->ks_update = cpu_stat_ks_update; 3118 ksp->ks_private = cp; 3119 kstat_install(ksp); 3120 } 3121 } 3122 3123 static void 3124 cpu_stats_kstat_destroy(cpu_t *cp) 3125 { 3126 char ks_name[KSTAT_STRLEN]; 3127 3128 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id); 3129 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name); 3130 3131 kstat_delete_byname("cpu", cp->cpu_id, "sys"); 3132 kstat_delete_byname("cpu", cp->cpu_id, "vm"); 3133 } 3134 3135 static int 3136 cpu_sys_stats_ks_update(kstat_t *ksp, int rw) 3137 { 3138 cpu_t *cp = (cpu_t *)ksp->ks_private; 3139 struct cpu_sys_stats_ks_data *csskd; 3140 cpu_sys_stats_t *css; 3141 hrtime_t msnsecs[NCMSTATES]; 3142 int i; 3143 3144 if (rw == KSTAT_WRITE) 3145 return (EACCES); 3146 3147 csskd = ksp->ks_data; 3148 css = &cp->cpu_stats.sys; 3149 3150 /* 3151 * Read CPU mstate, but compare with the last values we 3152 * received to make sure that the returned kstats never 3153 * decrease. 3154 */ 3155 3156 get_cpu_mstate(cp, msnsecs); 3157 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE]) 3158 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64; 3159 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER]) 3160 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64; 3161 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM]) 3162 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64; 3163 3164 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data, 3165 sizeof (cpu_sys_stats_ks_data_template)); 3166 3167 csskd->cpu_ticks_wait.value.ui64 = 0; 3168 csskd->wait_ticks_io.value.ui64 = 0; 3169 3170 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE]; 3171 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER]; 3172 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM]; 3173 csskd->cpu_ticks_idle.value.ui64 = 3174 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64); 3175 csskd->cpu_ticks_user.value.ui64 = 3176 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64); 3177 csskd->cpu_ticks_kernel.value.ui64 = 3178 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64); 3179 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast; 3180 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload; 3181 csskd->bread.value.ui64 = css->bread; 3182 csskd->bwrite.value.ui64 = css->bwrite; 3183 csskd->lread.value.ui64 = css->lread; 3184 csskd->lwrite.value.ui64 = css->lwrite; 3185 csskd->phread.value.ui64 = css->phread; 3186 csskd->phwrite.value.ui64 = css->phwrite; 3187 csskd->pswitch.value.ui64 = css->pswitch; 3188 csskd->trap.value.ui64 = css->trap; 3189 csskd->intr.value.ui64 = 0; 3190 for (i = 0; i < PIL_MAX; i++) 3191 csskd->intr.value.ui64 += css->intr[i]; 3192 csskd->syscall.value.ui64 = css->syscall; 3193 csskd->sysread.value.ui64 = css->sysread; 3194 csskd->syswrite.value.ui64 = css->syswrite; 3195 csskd->sysfork.value.ui64 = css->sysfork; 3196 csskd->sysvfork.value.ui64 = css->sysvfork; 3197 csskd->sysexec.value.ui64 = css->sysexec; 3198 csskd->readch.value.ui64 = css->readch; 3199 csskd->writech.value.ui64 = css->writech; 3200 csskd->rcvint.value.ui64 = css->rcvint; 3201 csskd->xmtint.value.ui64 = css->xmtint; 3202 csskd->mdmint.value.ui64 = css->mdmint; 3203 csskd->rawch.value.ui64 = css->rawch; 3204 csskd->canch.value.ui64 = css->canch; 3205 csskd->outch.value.ui64 = css->outch; 3206 csskd->msg.value.ui64 = css->msg; 3207 csskd->sema.value.ui64 = css->sema; 3208 csskd->namei.value.ui64 = css->namei; 3209 csskd->ufsiget.value.ui64 = css->ufsiget; 3210 csskd->ufsdirblk.value.ui64 = css->ufsdirblk; 3211 csskd->ufsipage.value.ui64 = css->ufsipage; 3212 csskd->ufsinopage.value.ui64 = css->ufsinopage; 3213 csskd->procovf.value.ui64 = css->procovf; 3214 csskd->intrthread.value.ui64 = 0; 3215 for (i = 0; i < LOCK_LEVEL - 1; i++) 3216 csskd->intrthread.value.ui64 += css->intr[i]; 3217 csskd->intrblk.value.ui64 = css->intrblk; 3218 csskd->intrunpin.value.ui64 = css->intrunpin; 3219 csskd->idlethread.value.ui64 = css->idlethread; 3220 csskd->inv_swtch.value.ui64 = css->inv_swtch; 3221 csskd->nthreads.value.ui64 = css->nthreads; 3222 csskd->cpumigrate.value.ui64 = css->cpumigrate; 3223 csskd->xcalls.value.ui64 = css->xcalls; 3224 csskd->mutex_adenters.value.ui64 = css->mutex_adenters; 3225 csskd->rw_rdfails.value.ui64 = css->rw_rdfails; 3226 csskd->rw_wrfails.value.ui64 = css->rw_wrfails; 3227 csskd->modload.value.ui64 = css->modload; 3228 csskd->modunload.value.ui64 = css->modunload; 3229 csskd->bawrite.value.ui64 = css->bawrite; 3230 csskd->iowait.value.ui64 = css->iowait; 3231 3232 return (0); 3233 } 3234 3235 static int 3236 cpu_vm_stats_ks_update(kstat_t *ksp, int rw) 3237 { 3238 cpu_t *cp = (cpu_t *)ksp->ks_private; 3239 struct cpu_vm_stats_ks_data *cvskd; 3240 cpu_vm_stats_t *cvs; 3241 3242 if (rw == KSTAT_WRITE) 3243 return (EACCES); 3244 3245 cvs = &cp->cpu_stats.vm; 3246 cvskd = ksp->ks_data; 3247 3248 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data, 3249 sizeof (cpu_vm_stats_ks_data_template)); 3250 cvskd->pgrec.value.ui64 = cvs->pgrec; 3251 cvskd->pgfrec.value.ui64 = cvs->pgfrec; 3252 cvskd->pgin.value.ui64 = cvs->pgin; 3253 cvskd->pgpgin.value.ui64 = cvs->pgpgin; 3254 cvskd->pgout.value.ui64 = cvs->pgout; 3255 cvskd->pgpgout.value.ui64 = cvs->pgpgout; 3256 cvskd->swapin.value.ui64 = cvs->swapin; 3257 cvskd->pgswapin.value.ui64 = cvs->pgswapin; 3258 cvskd->swapout.value.ui64 = cvs->swapout; 3259 cvskd->pgswapout.value.ui64 = cvs->pgswapout; 3260 cvskd->zfod.value.ui64 = cvs->zfod; 3261 cvskd->dfree.value.ui64 = cvs->dfree; 3262 cvskd->scan.value.ui64 = cvs->scan; 3263 cvskd->rev.value.ui64 = cvs->rev; 3264 cvskd->hat_fault.value.ui64 = cvs->hat_fault; 3265 cvskd->as_fault.value.ui64 = cvs->as_fault; 3266 cvskd->maj_fault.value.ui64 = cvs->maj_fault; 3267 cvskd->cow_fault.value.ui64 = cvs->cow_fault; 3268 cvskd->prot_fault.value.ui64 = cvs->prot_fault; 3269 cvskd->softlock.value.ui64 = cvs->softlock; 3270 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt; 3271 cvskd->pgrrun.value.ui64 = cvs->pgrrun; 3272 cvskd->execpgin.value.ui64 = cvs->execpgin; 3273 cvskd->execpgout.value.ui64 = cvs->execpgout; 3274 cvskd->execfree.value.ui64 = cvs->execfree; 3275 cvskd->anonpgin.value.ui64 = cvs->anonpgin; 3276 cvskd->anonpgout.value.ui64 = cvs->anonpgout; 3277 cvskd->anonfree.value.ui64 = cvs->anonfree; 3278 cvskd->fspgin.value.ui64 = cvs->fspgin; 3279 cvskd->fspgout.value.ui64 = cvs->fspgout; 3280 cvskd->fsfree.value.ui64 = cvs->fsfree; 3281 3282 return (0); 3283 } 3284 3285 static int 3286 cpu_stat_ks_update(kstat_t *ksp, int rw) 3287 { 3288 cpu_stat_t *cso; 3289 cpu_t *cp; 3290 int i; 3291 hrtime_t msnsecs[NCMSTATES]; 3292 3293 cso = (cpu_stat_t *)ksp->ks_data; 3294 cp = (cpu_t *)ksp->ks_private; 3295 3296 if (rw == KSTAT_WRITE) 3297 return (EACCES); 3298 3299 /* 3300 * Read CPU mstate, but compare with the last values we 3301 * received to make sure that the returned kstats never 3302 * decrease. 3303 */ 3304 3305 get_cpu_mstate(cp, msnsecs); 3306 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3307 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]); 3308 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3309 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE]) 3310 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE]; 3311 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER]) 3312 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER]; 3313 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM]) 3314 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM]; 3315 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0; 3316 cso->cpu_sysinfo.wait[W_IO] = 0; 3317 cso->cpu_sysinfo.wait[W_SWAP] = 0; 3318 cso->cpu_sysinfo.wait[W_PIO] = 0; 3319 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread); 3320 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite); 3321 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread); 3322 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite); 3323 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread); 3324 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite); 3325 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch); 3326 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap); 3327 cso->cpu_sysinfo.intr = 0; 3328 for (i = 0; i < PIL_MAX; i++) 3329 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]); 3330 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall); 3331 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread); 3332 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite); 3333 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork); 3334 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork); 3335 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec); 3336 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch); 3337 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech); 3338 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint); 3339 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint); 3340 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint); 3341 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch); 3342 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch); 3343 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch); 3344 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg); 3345 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema); 3346 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei); 3347 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget); 3348 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk); 3349 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage); 3350 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage); 3351 cso->cpu_sysinfo.inodeovf = 0; 3352 cso->cpu_sysinfo.fileovf = 0; 3353 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf); 3354 cso->cpu_sysinfo.intrthread = 0; 3355 for (i = 0; i < LOCK_LEVEL - 1; i++) 3356 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]); 3357 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk); 3358 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread); 3359 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch); 3360 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads); 3361 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate); 3362 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls); 3363 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters); 3364 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails); 3365 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails); 3366 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload); 3367 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload); 3368 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite); 3369 cso->cpu_sysinfo.rw_enters = 0; 3370 cso->cpu_sysinfo.win_uo_cnt = 0; 3371 cso->cpu_sysinfo.win_uu_cnt = 0; 3372 cso->cpu_sysinfo.win_so_cnt = 0; 3373 cso->cpu_sysinfo.win_su_cnt = 0; 3374 cso->cpu_sysinfo.win_suo_cnt = 0; 3375 3376 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait); 3377 cso->cpu_syswait.swap = 0; 3378 cso->cpu_syswait.physio = 0; 3379 3380 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec); 3381 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec); 3382 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin); 3383 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin); 3384 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout); 3385 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout); 3386 cso->cpu_vminfo.swapin = CPU_STATS(cp, vm.swapin); 3387 cso->cpu_vminfo.pgswapin = CPU_STATS(cp, vm.pgswapin); 3388 cso->cpu_vminfo.swapout = CPU_STATS(cp, vm.swapout); 3389 cso->cpu_vminfo.pgswapout = CPU_STATS(cp, vm.pgswapout); 3390 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod); 3391 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree); 3392 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan); 3393 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev); 3394 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault); 3395 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault); 3396 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault); 3397 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault); 3398 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault); 3399 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock); 3400 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt); 3401 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun); 3402 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin); 3403 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout); 3404 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree); 3405 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin); 3406 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout); 3407 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree); 3408 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin); 3409 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout); 3410 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree); 3411 3412 return (0); 3413 } 3414