1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Architecture-independent CPU control functions. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/var.h> 33 #include <sys/thread.h> 34 #include <sys/cpuvar.h> 35 #include <sys/kstat.h> 36 #include <sys/uadmin.h> 37 #include <sys/systm.h> 38 #include <sys/errno.h> 39 #include <sys/cmn_err.h> 40 #include <sys/procset.h> 41 #include <sys/processor.h> 42 #include <sys/debug.h> 43 #include <sys/cpupart.h> 44 #include <sys/lgrp.h> 45 #include <sys/pset.h> 46 #include <sys/pghw.h> 47 #include <sys/kmem.h> 48 #include <sys/kmem_impl.h> /* to set per-cpu kmem_cache offset */ 49 #include <sys/atomic.h> 50 #include <sys/callb.h> 51 #include <sys/vtrace.h> 52 #include <sys/cyclic.h> 53 #include <sys/bitmap.h> 54 #include <sys/nvpair.h> 55 #include <sys/pool_pset.h> 56 #include <sys/msacct.h> 57 #include <sys/time.h> 58 #include <sys/archsystm.h> 59 #include <sys/sdt.h> 60 #if defined(__x86) || defined(__amd64) 61 #include <sys/x86_archext.h> 62 #endif 63 #include <sys/callo.h> 64 65 extern int mp_cpu_start(cpu_t *); 66 extern int mp_cpu_stop(cpu_t *); 67 extern int mp_cpu_poweron(cpu_t *); 68 extern int mp_cpu_poweroff(cpu_t *); 69 extern int mp_cpu_configure(int); 70 extern int mp_cpu_unconfigure(int); 71 extern void mp_cpu_faulted_enter(cpu_t *); 72 extern void mp_cpu_faulted_exit(cpu_t *); 73 74 extern int cmp_cpu_to_chip(processorid_t cpuid); 75 #ifdef __sparcv9 76 extern char *cpu_fru_fmri(cpu_t *cp); 77 #endif 78 79 static void cpu_add_active_internal(cpu_t *cp); 80 static void cpu_remove_active(cpu_t *cp); 81 static void cpu_info_kstat_create(cpu_t *cp); 82 static void cpu_info_kstat_destroy(cpu_t *cp); 83 static void cpu_stats_kstat_create(cpu_t *cp); 84 static void cpu_stats_kstat_destroy(cpu_t *cp); 85 86 static int cpu_sys_stats_ks_update(kstat_t *ksp, int rw); 87 static int cpu_vm_stats_ks_update(kstat_t *ksp, int rw); 88 static int cpu_stat_ks_update(kstat_t *ksp, int rw); 89 static int cpu_state_change_hooks(int, cpu_setup_t, cpu_setup_t); 90 91 /* 92 * cpu_lock protects ncpus, ncpus_online, cpu_flag, cpu_list, cpu_active, 93 * and dispatch queue reallocations. The lock ordering with respect to 94 * related locks is: 95 * 96 * cpu_lock --> thread_free_lock ---> p_lock ---> thread_lock() 97 * 98 * Warning: Certain sections of code do not use the cpu_lock when 99 * traversing the cpu_list (e.g. mutex_vector_enter(), clock()). Since 100 * all cpus are paused during modifications to this list, a solution 101 * to protect the list is too either disable kernel preemption while 102 * walking the list, *or* recheck the cpu_next pointer at each 103 * iteration in the loop. Note that in no cases can any cached 104 * copies of the cpu pointers be kept as they may become invalid. 105 */ 106 kmutex_t cpu_lock; 107 cpu_t *cpu_list; /* list of all CPUs */ 108 cpu_t *clock_cpu_list; /* used by clock to walk CPUs */ 109 cpu_t *cpu_active; /* list of active CPUs */ 110 static cpuset_t cpu_available; /* set of available CPUs */ 111 cpuset_t cpu_seqid_inuse; /* which cpu_seqids are in use */ 112 113 cpu_t **cpu_seq; /* ptrs to CPUs, indexed by seq_id */ 114 115 /* 116 * max_ncpus keeps the max cpus the system can have. Initially 117 * it's NCPU, but since most archs scan the devtree for cpus 118 * fairly early on during boot, the real max can be known before 119 * ncpus is set (useful for early NCPU based allocations). 120 */ 121 int max_ncpus = NCPU; 122 /* 123 * platforms that set max_ncpus to maxiumum number of cpus that can be 124 * dynamically added will set boot_max_ncpus to the number of cpus found 125 * at device tree scan time during boot. 126 */ 127 int boot_max_ncpus = -1; 128 int boot_ncpus = -1; 129 /* 130 * Maximum possible CPU id. This can never be >= NCPU since NCPU is 131 * used to size arrays that are indexed by CPU id. 132 */ 133 processorid_t max_cpuid = NCPU - 1; 134 135 int ncpus = 1; 136 int ncpus_online = 1; 137 138 /* 139 * CPU that we're trying to offline. Protected by cpu_lock. 140 */ 141 cpu_t *cpu_inmotion; 142 143 /* 144 * Can be raised to suppress further weakbinding, which are instead 145 * satisfied by disabling preemption. Must be raised/lowered under cpu_lock, 146 * while individual thread weakbinding synchronisation is done under thread 147 * lock. 148 */ 149 int weakbindingbarrier; 150 151 /* 152 * Variables used in pause_cpus(). 153 */ 154 static volatile char safe_list[NCPU]; 155 156 static struct _cpu_pause_info { 157 int cp_spl; /* spl saved in pause_cpus() */ 158 volatile int cp_go; /* Go signal sent after all ready */ 159 int cp_count; /* # of CPUs to pause */ 160 ksema_t cp_sem; /* synch pause_cpus & cpu_pause */ 161 kthread_id_t cp_paused; 162 } cpu_pause_info; 163 164 static kmutex_t pause_free_mutex; 165 static kcondvar_t pause_free_cv; 166 167 void *(*cpu_pause_func)(void *) = NULL; 168 169 170 static struct cpu_sys_stats_ks_data { 171 kstat_named_t cpu_ticks_idle; 172 kstat_named_t cpu_ticks_user; 173 kstat_named_t cpu_ticks_kernel; 174 kstat_named_t cpu_ticks_wait; 175 kstat_named_t cpu_nsec_idle; 176 kstat_named_t cpu_nsec_user; 177 kstat_named_t cpu_nsec_kernel; 178 kstat_named_t cpu_nsec_intr; 179 kstat_named_t cpu_load_intr; 180 kstat_named_t wait_ticks_io; 181 kstat_named_t bread; 182 kstat_named_t bwrite; 183 kstat_named_t lread; 184 kstat_named_t lwrite; 185 kstat_named_t phread; 186 kstat_named_t phwrite; 187 kstat_named_t pswitch; 188 kstat_named_t trap; 189 kstat_named_t intr; 190 kstat_named_t syscall; 191 kstat_named_t sysread; 192 kstat_named_t syswrite; 193 kstat_named_t sysfork; 194 kstat_named_t sysvfork; 195 kstat_named_t sysexec; 196 kstat_named_t readch; 197 kstat_named_t writech; 198 kstat_named_t rcvint; 199 kstat_named_t xmtint; 200 kstat_named_t mdmint; 201 kstat_named_t rawch; 202 kstat_named_t canch; 203 kstat_named_t outch; 204 kstat_named_t msg; 205 kstat_named_t sema; 206 kstat_named_t namei; 207 kstat_named_t ufsiget; 208 kstat_named_t ufsdirblk; 209 kstat_named_t ufsipage; 210 kstat_named_t ufsinopage; 211 kstat_named_t procovf; 212 kstat_named_t intrthread; 213 kstat_named_t intrblk; 214 kstat_named_t intrunpin; 215 kstat_named_t idlethread; 216 kstat_named_t inv_swtch; 217 kstat_named_t nthreads; 218 kstat_named_t cpumigrate; 219 kstat_named_t xcalls; 220 kstat_named_t mutex_adenters; 221 kstat_named_t rw_rdfails; 222 kstat_named_t rw_wrfails; 223 kstat_named_t modload; 224 kstat_named_t modunload; 225 kstat_named_t bawrite; 226 kstat_named_t iowait; 227 } cpu_sys_stats_ks_data_template = { 228 { "cpu_ticks_idle", KSTAT_DATA_UINT64 }, 229 { "cpu_ticks_user", KSTAT_DATA_UINT64 }, 230 { "cpu_ticks_kernel", KSTAT_DATA_UINT64 }, 231 { "cpu_ticks_wait", KSTAT_DATA_UINT64 }, 232 { "cpu_nsec_idle", KSTAT_DATA_UINT64 }, 233 { "cpu_nsec_user", KSTAT_DATA_UINT64 }, 234 { "cpu_nsec_kernel", KSTAT_DATA_UINT64 }, 235 { "cpu_nsec_intr", KSTAT_DATA_UINT64 }, 236 { "cpu_load_intr", KSTAT_DATA_UINT64 }, 237 { "wait_ticks_io", KSTAT_DATA_UINT64 }, 238 { "bread", KSTAT_DATA_UINT64 }, 239 { "bwrite", KSTAT_DATA_UINT64 }, 240 { "lread", KSTAT_DATA_UINT64 }, 241 { "lwrite", KSTAT_DATA_UINT64 }, 242 { "phread", KSTAT_DATA_UINT64 }, 243 { "phwrite", KSTAT_DATA_UINT64 }, 244 { "pswitch", KSTAT_DATA_UINT64 }, 245 { "trap", KSTAT_DATA_UINT64 }, 246 { "intr", KSTAT_DATA_UINT64 }, 247 { "syscall", KSTAT_DATA_UINT64 }, 248 { "sysread", KSTAT_DATA_UINT64 }, 249 { "syswrite", KSTAT_DATA_UINT64 }, 250 { "sysfork", KSTAT_DATA_UINT64 }, 251 { "sysvfork", KSTAT_DATA_UINT64 }, 252 { "sysexec", KSTAT_DATA_UINT64 }, 253 { "readch", KSTAT_DATA_UINT64 }, 254 { "writech", KSTAT_DATA_UINT64 }, 255 { "rcvint", KSTAT_DATA_UINT64 }, 256 { "xmtint", KSTAT_DATA_UINT64 }, 257 { "mdmint", KSTAT_DATA_UINT64 }, 258 { "rawch", KSTAT_DATA_UINT64 }, 259 { "canch", KSTAT_DATA_UINT64 }, 260 { "outch", KSTAT_DATA_UINT64 }, 261 { "msg", KSTAT_DATA_UINT64 }, 262 { "sema", KSTAT_DATA_UINT64 }, 263 { "namei", KSTAT_DATA_UINT64 }, 264 { "ufsiget", KSTAT_DATA_UINT64 }, 265 { "ufsdirblk", KSTAT_DATA_UINT64 }, 266 { "ufsipage", KSTAT_DATA_UINT64 }, 267 { "ufsinopage", KSTAT_DATA_UINT64 }, 268 { "procovf", KSTAT_DATA_UINT64 }, 269 { "intrthread", KSTAT_DATA_UINT64 }, 270 { "intrblk", KSTAT_DATA_UINT64 }, 271 { "intrunpin", KSTAT_DATA_UINT64 }, 272 { "idlethread", KSTAT_DATA_UINT64 }, 273 { "inv_swtch", KSTAT_DATA_UINT64 }, 274 { "nthreads", KSTAT_DATA_UINT64 }, 275 { "cpumigrate", KSTAT_DATA_UINT64 }, 276 { "xcalls", KSTAT_DATA_UINT64 }, 277 { "mutex_adenters", KSTAT_DATA_UINT64 }, 278 { "rw_rdfails", KSTAT_DATA_UINT64 }, 279 { "rw_wrfails", KSTAT_DATA_UINT64 }, 280 { "modload", KSTAT_DATA_UINT64 }, 281 { "modunload", KSTAT_DATA_UINT64 }, 282 { "bawrite", KSTAT_DATA_UINT64 }, 283 { "iowait", KSTAT_DATA_UINT64 }, 284 }; 285 286 static struct cpu_vm_stats_ks_data { 287 kstat_named_t pgrec; 288 kstat_named_t pgfrec; 289 kstat_named_t pgin; 290 kstat_named_t pgpgin; 291 kstat_named_t pgout; 292 kstat_named_t pgpgout; 293 kstat_named_t swapin; 294 kstat_named_t pgswapin; 295 kstat_named_t swapout; 296 kstat_named_t pgswapout; 297 kstat_named_t zfod; 298 kstat_named_t dfree; 299 kstat_named_t scan; 300 kstat_named_t rev; 301 kstat_named_t hat_fault; 302 kstat_named_t as_fault; 303 kstat_named_t maj_fault; 304 kstat_named_t cow_fault; 305 kstat_named_t prot_fault; 306 kstat_named_t softlock; 307 kstat_named_t kernel_asflt; 308 kstat_named_t pgrrun; 309 kstat_named_t execpgin; 310 kstat_named_t execpgout; 311 kstat_named_t execfree; 312 kstat_named_t anonpgin; 313 kstat_named_t anonpgout; 314 kstat_named_t anonfree; 315 kstat_named_t fspgin; 316 kstat_named_t fspgout; 317 kstat_named_t fsfree; 318 } cpu_vm_stats_ks_data_template = { 319 { "pgrec", KSTAT_DATA_UINT64 }, 320 { "pgfrec", KSTAT_DATA_UINT64 }, 321 { "pgin", KSTAT_DATA_UINT64 }, 322 { "pgpgin", KSTAT_DATA_UINT64 }, 323 { "pgout", KSTAT_DATA_UINT64 }, 324 { "pgpgout", KSTAT_DATA_UINT64 }, 325 { "swapin", KSTAT_DATA_UINT64 }, 326 { "pgswapin", KSTAT_DATA_UINT64 }, 327 { "swapout", KSTAT_DATA_UINT64 }, 328 { "pgswapout", KSTAT_DATA_UINT64 }, 329 { "zfod", KSTAT_DATA_UINT64 }, 330 { "dfree", KSTAT_DATA_UINT64 }, 331 { "scan", KSTAT_DATA_UINT64 }, 332 { "rev", KSTAT_DATA_UINT64 }, 333 { "hat_fault", KSTAT_DATA_UINT64 }, 334 { "as_fault", KSTAT_DATA_UINT64 }, 335 { "maj_fault", KSTAT_DATA_UINT64 }, 336 { "cow_fault", KSTAT_DATA_UINT64 }, 337 { "prot_fault", KSTAT_DATA_UINT64 }, 338 { "softlock", KSTAT_DATA_UINT64 }, 339 { "kernel_asflt", KSTAT_DATA_UINT64 }, 340 { "pgrrun", KSTAT_DATA_UINT64 }, 341 { "execpgin", KSTAT_DATA_UINT64 }, 342 { "execpgout", KSTAT_DATA_UINT64 }, 343 { "execfree", KSTAT_DATA_UINT64 }, 344 { "anonpgin", KSTAT_DATA_UINT64 }, 345 { "anonpgout", KSTAT_DATA_UINT64 }, 346 { "anonfree", KSTAT_DATA_UINT64 }, 347 { "fspgin", KSTAT_DATA_UINT64 }, 348 { "fspgout", KSTAT_DATA_UINT64 }, 349 { "fsfree", KSTAT_DATA_UINT64 }, 350 }; 351 352 /* 353 * Force the specified thread to migrate to the appropriate processor. 354 * Called with thread lock held, returns with it dropped. 355 */ 356 static void 357 force_thread_migrate(kthread_id_t tp) 358 { 359 ASSERT(THREAD_LOCK_HELD(tp)); 360 if (tp == curthread) { 361 THREAD_TRANSITION(tp); 362 CL_SETRUN(tp); 363 thread_unlock_nopreempt(tp); 364 swtch(); 365 } else { 366 if (tp->t_state == TS_ONPROC) { 367 cpu_surrender(tp); 368 } else if (tp->t_state == TS_RUN) { 369 (void) dispdeq(tp); 370 setbackdq(tp); 371 } 372 thread_unlock(tp); 373 } 374 } 375 376 /* 377 * Set affinity for a specified CPU. 378 * A reference count is incremented and the affinity is held until the 379 * reference count is decremented to zero by thread_affinity_clear(). 380 * This is so regions of code requiring affinity can be nested. 381 * Caller needs to ensure that cpu_id remains valid, which can be 382 * done by holding cpu_lock across this call, unless the caller 383 * specifies CPU_CURRENT in which case the cpu_lock will be acquired 384 * by thread_affinity_set and CPU->cpu_id will be the target CPU. 385 */ 386 void 387 thread_affinity_set(kthread_id_t t, int cpu_id) 388 { 389 cpu_t *cp; 390 int c; 391 392 ASSERT(!(t == curthread && t->t_weakbound_cpu != NULL)); 393 394 if ((c = cpu_id) == CPU_CURRENT) { 395 mutex_enter(&cpu_lock); 396 cpu_id = CPU->cpu_id; 397 } 398 /* 399 * We should be asserting that cpu_lock is held here, but 400 * the NCA code doesn't acquire it. The following assert 401 * should be uncommented when the NCA code is fixed. 402 * 403 * ASSERT(MUTEX_HELD(&cpu_lock)); 404 */ 405 ASSERT((cpu_id >= 0) && (cpu_id < NCPU)); 406 cp = cpu[cpu_id]; 407 ASSERT(cp != NULL); /* user must provide a good cpu_id */ 408 /* 409 * If there is already a hard affinity requested, and this affinity 410 * conflicts with that, panic. 411 */ 412 thread_lock(t); 413 if (t->t_affinitycnt > 0 && t->t_bound_cpu != cp) { 414 panic("affinity_set: setting %p but already bound to %p", 415 (void *)cp, (void *)t->t_bound_cpu); 416 } 417 t->t_affinitycnt++; 418 t->t_bound_cpu = cp; 419 420 /* 421 * Make sure we're running on the right CPU. 422 */ 423 if (cp != t->t_cpu || t != curthread) { 424 force_thread_migrate(t); /* drops thread lock */ 425 } else { 426 thread_unlock(t); 427 } 428 429 if (c == CPU_CURRENT) 430 mutex_exit(&cpu_lock); 431 } 432 433 /* 434 * Wrapper for backward compatibility. 435 */ 436 void 437 affinity_set(int cpu_id) 438 { 439 thread_affinity_set(curthread, cpu_id); 440 } 441 442 /* 443 * Decrement the affinity reservation count and if it becomes zero, 444 * clear the CPU affinity for the current thread, or set it to the user's 445 * software binding request. 446 */ 447 void 448 thread_affinity_clear(kthread_id_t t) 449 { 450 register processorid_t binding; 451 452 thread_lock(t); 453 if (--t->t_affinitycnt == 0) { 454 if ((binding = t->t_bind_cpu) == PBIND_NONE) { 455 /* 456 * Adjust disp_max_unbound_pri if necessary. 457 */ 458 disp_adjust_unbound_pri(t); 459 t->t_bound_cpu = NULL; 460 if (t->t_cpu->cpu_part != t->t_cpupart) { 461 force_thread_migrate(t); 462 return; 463 } 464 } else { 465 t->t_bound_cpu = cpu[binding]; 466 /* 467 * Make sure the thread is running on the bound CPU. 468 */ 469 if (t->t_cpu != t->t_bound_cpu) { 470 force_thread_migrate(t); 471 return; /* already dropped lock */ 472 } 473 } 474 } 475 thread_unlock(t); 476 } 477 478 /* 479 * Wrapper for backward compatibility. 480 */ 481 void 482 affinity_clear(void) 483 { 484 thread_affinity_clear(curthread); 485 } 486 487 /* 488 * Weak cpu affinity. Bind to the "current" cpu for short periods 489 * of time during which the thread must not block (but may be preempted). 490 * Use this instead of kpreempt_disable() when it is only "no migration" 491 * rather than "no preemption" semantics that are required - disabling 492 * preemption holds higher priority threads off of cpu and if the 493 * operation that is protected is more than momentary this is not good 494 * for realtime etc. 495 * 496 * Weakly bound threads will not prevent a cpu from being offlined - 497 * we'll only run them on the cpu to which they are weakly bound but 498 * (because they do not block) we'll always be able to move them on to 499 * another cpu at offline time if we give them just a short moment to 500 * run during which they will unbind. To give a cpu a chance of offlining, 501 * however, we require a barrier to weak bindings that may be raised for a 502 * given cpu (offline/move code may set this and then wait a short time for 503 * existing weak bindings to drop); the cpu_inmotion pointer is that barrier. 504 * 505 * There are few restrictions on the calling context of thread_nomigrate. 506 * The caller must not hold the thread lock. Calls may be nested. 507 * 508 * After weakbinding a thread must not perform actions that may block. 509 * In particular it must not call thread_affinity_set; calling that when 510 * already weakbound is nonsensical anyway. 511 * 512 * If curthread is prevented from migrating for other reasons 513 * (kernel preemption disabled; high pil; strongly bound; interrupt thread) 514 * then the weak binding will succeed even if this cpu is the target of an 515 * offline/move request. 516 */ 517 void 518 thread_nomigrate(void) 519 { 520 cpu_t *cp; 521 kthread_id_t t = curthread; 522 523 again: 524 kpreempt_disable(); 525 cp = CPU; 526 527 /* 528 * A highlevel interrupt must not modify t_nomigrate or 529 * t_weakbound_cpu of the thread it has interrupted. A lowlevel 530 * interrupt thread cannot migrate and we can avoid the 531 * thread_lock call below by short-circuiting here. In either 532 * case we can just return since no migration is possible and 533 * the condition will persist (ie, when we test for these again 534 * in thread_allowmigrate they can't have changed). Migration 535 * is also impossible if we're at or above DISP_LEVEL pil. 536 */ 537 if (CPU_ON_INTR(cp) || t->t_flag & T_INTR_THREAD || 538 getpil() >= DISP_LEVEL) { 539 kpreempt_enable(); 540 return; 541 } 542 543 /* 544 * We must be consistent with existing weak bindings. Since we 545 * may be interrupted between the increment of t_nomigrate and 546 * the store to t_weakbound_cpu below we cannot assume that 547 * t_weakbound_cpu will be set if t_nomigrate is. Note that we 548 * cannot assert t_weakbound_cpu == t_bind_cpu since that is not 549 * always the case. 550 */ 551 if (t->t_nomigrate && t->t_weakbound_cpu && t->t_weakbound_cpu != cp) { 552 if (!panicstr) 553 panic("thread_nomigrate: binding to %p but already " 554 "bound to %p", (void *)cp, 555 (void *)t->t_weakbound_cpu); 556 } 557 558 /* 559 * At this point we have preemption disabled and we don't yet hold 560 * the thread lock. So it's possible that somebody else could 561 * set t_bind_cpu here and not be able to force us across to the 562 * new cpu (since we have preemption disabled). 563 */ 564 thread_lock(curthread); 565 566 /* 567 * If further weak bindings are being (temporarily) suppressed then 568 * we'll settle for disabling kernel preemption (which assures 569 * no migration provided the thread does not block which it is 570 * not allowed to if using thread_nomigrate). We must remember 571 * this disposition so we can take appropriate action in 572 * thread_allowmigrate. If this is a nested call and the 573 * thread is already weakbound then fall through as normal. 574 * We remember the decision to settle for kpreempt_disable through 575 * negative nesting counting in t_nomigrate. Once a thread has had one 576 * weakbinding request satisfied in this way any further (nested) 577 * requests will continue to be satisfied in the same way, 578 * even if weak bindings have recommenced. 579 */ 580 if (t->t_nomigrate < 0 || weakbindingbarrier && t->t_nomigrate == 0) { 581 --t->t_nomigrate; 582 thread_unlock(curthread); 583 return; /* with kpreempt_disable still active */ 584 } 585 586 /* 587 * We hold thread_lock so t_bind_cpu cannot change. We could, 588 * however, be running on a different cpu to which we are t_bound_cpu 589 * to (as explained above). If we grant the weak binding request 590 * in that case then the dispatcher must favour our weak binding 591 * over our strong (in which case, just as when preemption is 592 * disabled, we can continue to run on a cpu other than the one to 593 * which we are strongbound; the difference in this case is that 594 * this thread can be preempted and so can appear on the dispatch 595 * queues of a cpu other than the one it is strongbound to). 596 * 597 * If the cpu we are running on does not appear to be a current 598 * offline target (we check cpu_inmotion to determine this - since 599 * we don't hold cpu_lock we may not see a recent store to that, 600 * so it's possible that we at times can grant a weak binding to a 601 * cpu that is an offline target, but that one request will not 602 * prevent the offline from succeeding) then we will always grant 603 * the weak binding request. This includes the case above where 604 * we grant a weakbinding not commensurate with our strong binding. 605 * 606 * If our cpu does appear to be an offline target then we're inclined 607 * not to grant the weakbinding request just yet - we'd prefer to 608 * migrate to another cpu and grant the request there. The 609 * exceptions are those cases where going through preemption code 610 * will not result in us changing cpu: 611 * 612 * . interrupts have already bypassed this case (see above) 613 * . we are already weakbound to this cpu (dispatcher code will 614 * always return us to the weakbound cpu) 615 * . preemption was disabled even before we disabled it above 616 * . we are strongbound to this cpu (if we're strongbound to 617 * another and not yet running there the trip through the 618 * dispatcher will move us to the strongbound cpu and we 619 * will grant the weak binding there) 620 */ 621 if (cp != cpu_inmotion || t->t_nomigrate > 0 || t->t_preempt > 1 || 622 t->t_bound_cpu == cp) { 623 /* 624 * Don't be tempted to store to t_weakbound_cpu only on 625 * the first nested bind request - if we're interrupted 626 * after the increment of t_nomigrate and before the 627 * store to t_weakbound_cpu and the interrupt calls 628 * thread_nomigrate then the assertion in thread_allowmigrate 629 * would fail. 630 */ 631 t->t_nomigrate++; 632 t->t_weakbound_cpu = cp; 633 membar_producer(); 634 thread_unlock(curthread); 635 /* 636 * Now that we have dropped the thread_lock another thread 637 * can set our t_weakbound_cpu, and will try to migrate us 638 * to the strongbound cpu (which will not be prevented by 639 * preemption being disabled since we're about to enable 640 * preemption). We have granted the weakbinding to the current 641 * cpu, so again we are in the position that is is is possible 642 * that our weak and strong bindings differ. Again this 643 * is catered for by dispatcher code which will favour our 644 * weak binding. 645 */ 646 kpreempt_enable(); 647 } else { 648 /* 649 * Move to another cpu before granting the request by 650 * forcing this thread through preemption code. When we 651 * get to set{front,back}dq called from CL_PREEMPT() 652 * cpu_choose() will be used to select a cpu to queue 653 * us on - that will see cpu_inmotion and take 654 * steps to avoid returning us to this cpu. 655 */ 656 cp->cpu_kprunrun = 1; 657 thread_unlock(curthread); 658 kpreempt_enable(); /* will call preempt() */ 659 goto again; 660 } 661 } 662 663 void 664 thread_allowmigrate(void) 665 { 666 kthread_id_t t = curthread; 667 668 ASSERT(t->t_weakbound_cpu == CPU || 669 (t->t_nomigrate < 0 && t->t_preempt > 0) || 670 CPU_ON_INTR(CPU) || t->t_flag & T_INTR_THREAD || 671 getpil() >= DISP_LEVEL); 672 673 if (CPU_ON_INTR(CPU) || (t->t_flag & T_INTR_THREAD) || 674 getpil() >= DISP_LEVEL) 675 return; 676 677 if (t->t_nomigrate < 0) { 678 /* 679 * This thread was granted "weak binding" in the 680 * stronger form of kernel preemption disabling. 681 * Undo a level of nesting for both t_nomigrate 682 * and t_preempt. 683 */ 684 ++t->t_nomigrate; 685 kpreempt_enable(); 686 } else if (--t->t_nomigrate == 0) { 687 /* 688 * Time to drop the weak binding. We need to cater 689 * for the case where we're weakbound to a different 690 * cpu than that to which we're strongbound (a very 691 * temporary arrangement that must only persist until 692 * weak binding drops). We don't acquire thread_lock 693 * here so even as this code executes t_bound_cpu 694 * may be changing. So we disable preemption and 695 * a) in the case that t_bound_cpu changes while we 696 * have preemption disabled kprunrun will be set 697 * asynchronously, and b) if before disabling 698 * preemption we were already on a different cpu to 699 * our t_bound_cpu then we set kprunrun ourselves 700 * to force a trip through the dispatcher when 701 * preemption is enabled. 702 */ 703 kpreempt_disable(); 704 if (t->t_bound_cpu && 705 t->t_weakbound_cpu != t->t_bound_cpu) 706 CPU->cpu_kprunrun = 1; 707 t->t_weakbound_cpu = NULL; 708 membar_producer(); 709 kpreempt_enable(); 710 } 711 } 712 713 /* 714 * weakbinding_stop can be used to temporarily cause weakbindings made 715 * with thread_nomigrate to be satisfied through the stronger action of 716 * kpreempt_disable. weakbinding_start recommences normal weakbinding. 717 */ 718 719 void 720 weakbinding_stop(void) 721 { 722 ASSERT(MUTEX_HELD(&cpu_lock)); 723 weakbindingbarrier = 1; 724 membar_producer(); /* make visible before subsequent thread_lock */ 725 } 726 727 void 728 weakbinding_start(void) 729 { 730 ASSERT(MUTEX_HELD(&cpu_lock)); 731 weakbindingbarrier = 0; 732 } 733 734 void 735 null_xcall(void) 736 { 737 } 738 739 /* 740 * This routine is called to place the CPUs in a safe place so that 741 * one of them can be taken off line or placed on line. What we are 742 * trying to do here is prevent a thread from traversing the list 743 * of active CPUs while we are changing it or from getting placed on 744 * the run queue of a CPU that has just gone off line. We do this by 745 * creating a thread with the highest possible prio for each CPU and 746 * having it call this routine. The advantage of this method is that 747 * we can eliminate all checks for CPU_ACTIVE in the disp routines. 748 * This makes disp faster at the expense of making p_online() slower 749 * which is a good trade off. 750 */ 751 static void 752 cpu_pause(int index) 753 { 754 int s; 755 struct _cpu_pause_info *cpi = &cpu_pause_info; 756 volatile char *safe = &safe_list[index]; 757 long lindex = index; 758 759 ASSERT((curthread->t_bound_cpu != NULL) || (*safe == PAUSE_DIE)); 760 761 while (*safe != PAUSE_DIE) { 762 *safe = PAUSE_READY; 763 membar_enter(); /* make sure stores are flushed */ 764 sema_v(&cpi->cp_sem); /* signal requesting thread */ 765 766 /* 767 * Wait here until all pause threads are running. That 768 * indicates that it's safe to do the spl. Until 769 * cpu_pause_info.cp_go is set, we don't want to spl 770 * because that might block clock interrupts needed 771 * to preempt threads on other CPUs. 772 */ 773 while (cpi->cp_go == 0) 774 ; 775 /* 776 * Even though we are at the highest disp prio, we need 777 * to block out all interrupts below LOCK_LEVEL so that 778 * an intr doesn't come in, wake up a thread, and call 779 * setbackdq/setfrontdq. 780 */ 781 s = splhigh(); 782 /* 783 * if cpu_pause_func() has been set then call it using 784 * index as the argument, currently only used by 785 * cpr_suspend_cpus(). This function is used as the 786 * code to execute on the "paused" cpu's when a machine 787 * comes out of a sleep state and CPU's were powered off. 788 * (could also be used for hotplugging CPU's). 789 */ 790 if (cpu_pause_func != NULL) 791 (*cpu_pause_func)((void *)lindex); 792 793 mach_cpu_pause(safe); 794 795 splx(s); 796 /* 797 * Waiting is at an end. Switch out of cpu_pause 798 * loop and resume useful work. 799 */ 800 swtch(); 801 } 802 803 mutex_enter(&pause_free_mutex); 804 *safe = PAUSE_DEAD; 805 cv_broadcast(&pause_free_cv); 806 mutex_exit(&pause_free_mutex); 807 } 808 809 /* 810 * Allow the cpus to start running again. 811 */ 812 void 813 start_cpus() 814 { 815 int i; 816 817 ASSERT(MUTEX_HELD(&cpu_lock)); 818 ASSERT(cpu_pause_info.cp_paused); 819 cpu_pause_info.cp_paused = NULL; 820 for (i = 0; i < NCPU; i++) 821 safe_list[i] = PAUSE_IDLE; 822 membar_enter(); /* make sure stores are flushed */ 823 affinity_clear(); 824 splx(cpu_pause_info.cp_spl); 825 kpreempt_enable(); 826 } 827 828 /* 829 * Allocate a pause thread for a CPU. 830 */ 831 static void 832 cpu_pause_alloc(cpu_t *cp) 833 { 834 kthread_id_t t; 835 long cpun = cp->cpu_id; 836 837 /* 838 * Note, v.v_nglobpris will not change value as long as I hold 839 * cpu_lock. 840 */ 841 t = thread_create(NULL, 0, cpu_pause, (void *)cpun, 842 0, &p0, TS_STOPPED, v.v_nglobpris - 1); 843 thread_lock(t); 844 t->t_bound_cpu = cp; 845 t->t_disp_queue = cp->cpu_disp; 846 t->t_affinitycnt = 1; 847 t->t_preempt = 1; 848 thread_unlock(t); 849 cp->cpu_pause_thread = t; 850 /* 851 * Registering a thread in the callback table is usually done 852 * in the initialization code of the thread. In this 853 * case, we do it right after thread creation because the 854 * thread itself may never run, and we need to register the 855 * fact that it is safe for cpr suspend. 856 */ 857 CALLB_CPR_INIT_SAFE(t, "cpu_pause"); 858 } 859 860 /* 861 * Free a pause thread for a CPU. 862 */ 863 static void 864 cpu_pause_free(cpu_t *cp) 865 { 866 kthread_id_t t; 867 int cpun = cp->cpu_id; 868 869 ASSERT(MUTEX_HELD(&cpu_lock)); 870 /* 871 * We have to get the thread and tell him to die. 872 */ 873 if ((t = cp->cpu_pause_thread) == NULL) { 874 ASSERT(safe_list[cpun] == PAUSE_IDLE); 875 return; 876 } 877 thread_lock(t); 878 t->t_cpu = CPU; /* disp gets upset if last cpu is quiesced. */ 879 t->t_bound_cpu = NULL; /* Must un-bind; cpu may not be running. */ 880 t->t_pri = v.v_nglobpris - 1; 881 ASSERT(safe_list[cpun] == PAUSE_IDLE); 882 safe_list[cpun] = PAUSE_DIE; 883 THREAD_TRANSITION(t); 884 setbackdq(t); 885 thread_unlock_nopreempt(t); 886 887 /* 888 * If we don't wait for the thread to actually die, it may try to 889 * run on the wrong cpu as part of an actual call to pause_cpus(). 890 */ 891 mutex_enter(&pause_free_mutex); 892 while (safe_list[cpun] != PAUSE_DEAD) { 893 cv_wait(&pause_free_cv, &pause_free_mutex); 894 } 895 mutex_exit(&pause_free_mutex); 896 safe_list[cpun] = PAUSE_IDLE; 897 898 cp->cpu_pause_thread = NULL; 899 } 900 901 /* 902 * Initialize basic structures for pausing CPUs. 903 */ 904 void 905 cpu_pause_init() 906 { 907 sema_init(&cpu_pause_info.cp_sem, 0, NULL, SEMA_DEFAULT, NULL); 908 /* 909 * Create initial CPU pause thread. 910 */ 911 cpu_pause_alloc(CPU); 912 } 913 914 /* 915 * Start the threads used to pause another CPU. 916 */ 917 static int 918 cpu_pause_start(processorid_t cpu_id) 919 { 920 int i; 921 int cpu_count = 0; 922 923 for (i = 0; i < NCPU; i++) { 924 cpu_t *cp; 925 kthread_id_t t; 926 927 cp = cpu[i]; 928 if (!CPU_IN_SET(cpu_available, i) || (i == cpu_id)) { 929 safe_list[i] = PAUSE_WAIT; 930 continue; 931 } 932 933 /* 934 * Skip CPU if it is quiesced or not yet started. 935 */ 936 if ((cp->cpu_flags & (CPU_QUIESCED | CPU_READY)) != CPU_READY) { 937 safe_list[i] = PAUSE_WAIT; 938 continue; 939 } 940 941 /* 942 * Start this CPU's pause thread. 943 */ 944 t = cp->cpu_pause_thread; 945 thread_lock(t); 946 /* 947 * Reset the priority, since nglobpris may have 948 * changed since the thread was created, if someone 949 * has loaded the RT (or some other) scheduling 950 * class. 951 */ 952 t->t_pri = v.v_nglobpris - 1; 953 THREAD_TRANSITION(t); 954 setbackdq(t); 955 thread_unlock_nopreempt(t); 956 ++cpu_count; 957 } 958 return (cpu_count); 959 } 960 961 962 /* 963 * Pause all of the CPUs except the one we are on by creating a high 964 * priority thread bound to those CPUs. 965 * 966 * Note that one must be extremely careful regarding code 967 * executed while CPUs are paused. Since a CPU may be paused 968 * while a thread scheduling on that CPU is holding an adaptive 969 * lock, code executed with CPUs paused must not acquire adaptive 970 * (or low-level spin) locks. Also, such code must not block, 971 * since the thread that is supposed to initiate the wakeup may 972 * never run. 973 * 974 * With a few exceptions, the restrictions on code executed with CPUs 975 * paused match those for code executed at high-level interrupt 976 * context. 977 */ 978 void 979 pause_cpus(cpu_t *off_cp) 980 { 981 processorid_t cpu_id; 982 int i; 983 struct _cpu_pause_info *cpi = &cpu_pause_info; 984 985 ASSERT(MUTEX_HELD(&cpu_lock)); 986 ASSERT(cpi->cp_paused == NULL); 987 cpi->cp_count = 0; 988 cpi->cp_go = 0; 989 for (i = 0; i < NCPU; i++) 990 safe_list[i] = PAUSE_IDLE; 991 kpreempt_disable(); 992 993 /* 994 * If running on the cpu that is going offline, get off it. 995 * This is so that it won't be necessary to rechoose a CPU 996 * when done. 997 */ 998 if (CPU == off_cp) 999 cpu_id = off_cp->cpu_next_part->cpu_id; 1000 else 1001 cpu_id = CPU->cpu_id; 1002 affinity_set(cpu_id); 1003 1004 /* 1005 * Start the pause threads and record how many were started 1006 */ 1007 cpi->cp_count = cpu_pause_start(cpu_id); 1008 1009 /* 1010 * Now wait for all CPUs to be running the pause thread. 1011 */ 1012 while (cpi->cp_count > 0) { 1013 /* 1014 * Spin reading the count without grabbing the disp 1015 * lock to make sure we don't prevent the pause 1016 * threads from getting the lock. 1017 */ 1018 while (sema_held(&cpi->cp_sem)) 1019 ; 1020 if (sema_tryp(&cpi->cp_sem)) 1021 --cpi->cp_count; 1022 } 1023 cpi->cp_go = 1; /* all have reached cpu_pause */ 1024 1025 /* 1026 * Now wait for all CPUs to spl. (Transition from PAUSE_READY 1027 * to PAUSE_WAIT.) 1028 */ 1029 for (i = 0; i < NCPU; i++) { 1030 while (safe_list[i] != PAUSE_WAIT) 1031 ; 1032 } 1033 cpi->cp_spl = splhigh(); /* block dispatcher on this CPU */ 1034 cpi->cp_paused = curthread; 1035 } 1036 1037 /* 1038 * Check whether the current thread has CPUs paused 1039 */ 1040 int 1041 cpus_paused(void) 1042 { 1043 if (cpu_pause_info.cp_paused != NULL) { 1044 ASSERT(cpu_pause_info.cp_paused == curthread); 1045 return (1); 1046 } 1047 return (0); 1048 } 1049 1050 static cpu_t * 1051 cpu_get_all(processorid_t cpun) 1052 { 1053 ASSERT(MUTEX_HELD(&cpu_lock)); 1054 1055 if (cpun >= NCPU || cpun < 0 || !CPU_IN_SET(cpu_available, cpun)) 1056 return (NULL); 1057 return (cpu[cpun]); 1058 } 1059 1060 /* 1061 * Check whether cpun is a valid processor id and whether it should be 1062 * visible from the current zone. If it is, return a pointer to the 1063 * associated CPU structure. 1064 */ 1065 cpu_t * 1066 cpu_get(processorid_t cpun) 1067 { 1068 cpu_t *c; 1069 1070 ASSERT(MUTEX_HELD(&cpu_lock)); 1071 c = cpu_get_all(cpun); 1072 if (c != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() && 1073 zone_pset_get(curproc->p_zone) != cpupart_query_cpu(c)) 1074 return (NULL); 1075 return (c); 1076 } 1077 1078 /* 1079 * The following functions should be used to check CPU states in the kernel. 1080 * They should be invoked with cpu_lock held. Kernel subsystems interested 1081 * in CPU states should *not* use cpu_get_state() and various P_ONLINE/etc 1082 * states. Those are for user-land (and system call) use only. 1083 */ 1084 1085 /* 1086 * Determine whether the CPU is online and handling interrupts. 1087 */ 1088 int 1089 cpu_is_online(cpu_t *cpu) 1090 { 1091 ASSERT(MUTEX_HELD(&cpu_lock)); 1092 return (cpu_flagged_online(cpu->cpu_flags)); 1093 } 1094 1095 /* 1096 * Determine whether the CPU is offline (this includes spare and faulted). 1097 */ 1098 int 1099 cpu_is_offline(cpu_t *cpu) 1100 { 1101 ASSERT(MUTEX_HELD(&cpu_lock)); 1102 return (cpu_flagged_offline(cpu->cpu_flags)); 1103 } 1104 1105 /* 1106 * Determine whether the CPU is powered off. 1107 */ 1108 int 1109 cpu_is_poweredoff(cpu_t *cpu) 1110 { 1111 ASSERT(MUTEX_HELD(&cpu_lock)); 1112 return (cpu_flagged_poweredoff(cpu->cpu_flags)); 1113 } 1114 1115 /* 1116 * Determine whether the CPU is handling interrupts. 1117 */ 1118 int 1119 cpu_is_nointr(cpu_t *cpu) 1120 { 1121 ASSERT(MUTEX_HELD(&cpu_lock)); 1122 return (cpu_flagged_nointr(cpu->cpu_flags)); 1123 } 1124 1125 /* 1126 * Determine whether the CPU is active (scheduling threads). 1127 */ 1128 int 1129 cpu_is_active(cpu_t *cpu) 1130 { 1131 ASSERT(MUTEX_HELD(&cpu_lock)); 1132 return (cpu_flagged_active(cpu->cpu_flags)); 1133 } 1134 1135 /* 1136 * Same as above, but these require cpu_flags instead of cpu_t pointers. 1137 */ 1138 int 1139 cpu_flagged_online(cpu_flag_t cpu_flags) 1140 { 1141 return (cpu_flagged_active(cpu_flags) && 1142 (cpu_flags & CPU_ENABLE)); 1143 } 1144 1145 int 1146 cpu_flagged_offline(cpu_flag_t cpu_flags) 1147 { 1148 return (((cpu_flags & CPU_POWEROFF) == 0) && 1149 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY)); 1150 } 1151 1152 int 1153 cpu_flagged_poweredoff(cpu_flag_t cpu_flags) 1154 { 1155 return ((cpu_flags & CPU_POWEROFF) == CPU_POWEROFF); 1156 } 1157 1158 int 1159 cpu_flagged_nointr(cpu_flag_t cpu_flags) 1160 { 1161 return (cpu_flagged_active(cpu_flags) && 1162 (cpu_flags & CPU_ENABLE) == 0); 1163 } 1164 1165 int 1166 cpu_flagged_active(cpu_flag_t cpu_flags) 1167 { 1168 return (((cpu_flags & (CPU_POWEROFF | CPU_FAULTED | CPU_SPARE)) == 0) && 1169 ((cpu_flags & (CPU_READY | CPU_OFFLINE)) == CPU_READY)); 1170 } 1171 1172 /* 1173 * Bring the indicated CPU online. 1174 */ 1175 int 1176 cpu_online(cpu_t *cp) 1177 { 1178 int error = 0; 1179 1180 /* 1181 * Handle on-line request. 1182 * This code must put the new CPU on the active list before 1183 * starting it because it will not be paused, and will start 1184 * using the active list immediately. The real start occurs 1185 * when the CPU_QUIESCED flag is turned off. 1186 */ 1187 1188 ASSERT(MUTEX_HELD(&cpu_lock)); 1189 1190 /* 1191 * Put all the cpus into a known safe place. 1192 * No mutexes can be entered while CPUs are paused. 1193 */ 1194 error = mp_cpu_start(cp); /* arch-dep hook */ 1195 if (error == 0) { 1196 pg_cpupart_in(cp, cp->cpu_part); 1197 pause_cpus(NULL); 1198 cpu_add_active_internal(cp); 1199 if (cp->cpu_flags & CPU_FAULTED) { 1200 cp->cpu_flags &= ~CPU_FAULTED; 1201 mp_cpu_faulted_exit(cp); 1202 } 1203 cp->cpu_flags &= ~(CPU_QUIESCED | CPU_OFFLINE | CPU_FROZEN | 1204 CPU_SPARE); 1205 start_cpus(); 1206 cpu_stats_kstat_create(cp); 1207 cpu_create_intrstat(cp); 1208 lgrp_kstat_create(cp); 1209 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1210 cpu_intr_enable(cp); /* arch-dep hook */ 1211 cpu_set_state(cp); 1212 cyclic_online(cp); 1213 /* 1214 * This has to be called only after cyclic_online(). This 1215 * function uses cyclics. 1216 */ 1217 callout_cpu_online(cp); 1218 poke_cpu(cp->cpu_id); 1219 } 1220 1221 return (error); 1222 } 1223 1224 /* 1225 * Take the indicated CPU offline. 1226 */ 1227 int 1228 cpu_offline(cpu_t *cp, int flags) 1229 { 1230 cpupart_t *pp; 1231 int error = 0; 1232 cpu_t *ncp; 1233 int intr_enable; 1234 int cyclic_off = 0; 1235 int callout_off = 0; 1236 int loop_count; 1237 int no_quiesce = 0; 1238 int (*bound_func)(struct cpu *, int); 1239 kthread_t *t; 1240 lpl_t *cpu_lpl; 1241 proc_t *p; 1242 int lgrp_diff_lpl; 1243 boolean_t unbind_all_threads = (flags & CPU_FORCED) != 0; 1244 1245 ASSERT(MUTEX_HELD(&cpu_lock)); 1246 1247 /* 1248 * If we're going from faulted or spare to offline, just 1249 * clear these flags and update CPU state. 1250 */ 1251 if (cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) { 1252 if (cp->cpu_flags & CPU_FAULTED) { 1253 cp->cpu_flags &= ~CPU_FAULTED; 1254 mp_cpu_faulted_exit(cp); 1255 } 1256 cp->cpu_flags &= ~CPU_SPARE; 1257 cpu_set_state(cp); 1258 return (0); 1259 } 1260 1261 /* 1262 * Handle off-line request. 1263 */ 1264 pp = cp->cpu_part; 1265 /* 1266 * Don't offline last online CPU in partition 1267 */ 1268 if (ncpus_online <= 1 || pp->cp_ncpus <= 1 || cpu_intr_count(cp) < 2) 1269 return (EBUSY); 1270 /* 1271 * Unbind all soft-bound threads bound to our CPU and hard bound threads 1272 * if we were asked to. 1273 */ 1274 error = cpu_unbind(cp->cpu_id, unbind_all_threads); 1275 if (error != 0) 1276 return (error); 1277 /* 1278 * We shouldn't be bound to this CPU ourselves. 1279 */ 1280 if (curthread->t_bound_cpu == cp) 1281 return (EBUSY); 1282 1283 /* 1284 * Tell interested parties that this CPU is going offline. 1285 */ 1286 cpu_state_change_notify(cp->cpu_id, CPU_OFF); 1287 1288 /* 1289 * Tell the PG subsystem that the CPU is leaving the partition 1290 */ 1291 pg_cpupart_out(cp, pp); 1292 1293 /* 1294 * Take the CPU out of interrupt participation so we won't find 1295 * bound kernel threads. If the architecture cannot completely 1296 * shut off interrupts on the CPU, don't quiesce it, but don't 1297 * run anything but interrupt thread... this is indicated by 1298 * the CPU_OFFLINE flag being on but the CPU_QUIESCE flag being 1299 * off. 1300 */ 1301 intr_enable = cp->cpu_flags & CPU_ENABLE; 1302 if (intr_enable) 1303 no_quiesce = cpu_intr_disable(cp); 1304 1305 /* 1306 * Record that we are aiming to offline this cpu. This acts as 1307 * a barrier to further weak binding requests in thread_nomigrate 1308 * and also causes cpu_choose, disp_lowpri_cpu and setfrontdq to 1309 * lean away from this cpu. Further strong bindings are already 1310 * avoided since we hold cpu_lock. Since threads that are set 1311 * runnable around now and others coming off the target cpu are 1312 * directed away from the target, existing strong and weak bindings 1313 * (especially the latter) to the target cpu stand maximum chance of 1314 * being able to unbind during the short delay loop below (if other 1315 * unbound threads compete they may not see cpu in time to unbind 1316 * even if they would do so immediately. 1317 */ 1318 cpu_inmotion = cp; 1319 membar_enter(); 1320 1321 /* 1322 * Check for kernel threads (strong or weak) bound to that CPU. 1323 * Strongly bound threads may not unbind, and we'll have to return 1324 * EBUSY. Weakly bound threads should always disappear - we've 1325 * stopped more weak binding with cpu_inmotion and existing 1326 * bindings will drain imminently (they may not block). Nonetheless 1327 * we will wait for a fixed period for all bound threads to disappear. 1328 * Inactive interrupt threads are OK (they'll be in TS_FREE 1329 * state). If test finds some bound threads, wait a few ticks 1330 * to give short-lived threads (such as interrupts) chance to 1331 * complete. Note that if no_quiesce is set, i.e. this cpu 1332 * is required to service interrupts, then we take the route 1333 * that permits interrupt threads to be active (or bypassed). 1334 */ 1335 bound_func = no_quiesce ? disp_bound_threads : disp_bound_anythreads; 1336 1337 again: for (loop_count = 0; (*bound_func)(cp, 0); loop_count++) { 1338 if (loop_count >= 5) { 1339 error = EBUSY; /* some threads still bound */ 1340 break; 1341 } 1342 1343 /* 1344 * If some threads were assigned, give them 1345 * a chance to complete or move. 1346 * 1347 * This assumes that the clock_thread is not bound 1348 * to any CPU, because the clock_thread is needed to 1349 * do the delay(hz/100). 1350 * 1351 * Note: we still hold the cpu_lock while waiting for 1352 * the next clock tick. This is OK since it isn't 1353 * needed for anything else except processor_bind(2), 1354 * and system initialization. If we drop the lock, 1355 * we would risk another p_online disabling the last 1356 * processor. 1357 */ 1358 delay(hz/100); 1359 } 1360 1361 if (error == 0 && callout_off == 0) { 1362 callout_cpu_offline(cp); 1363 callout_off = 1; 1364 } 1365 1366 if (error == 0 && cyclic_off == 0) { 1367 if (!cyclic_offline(cp)) { 1368 /* 1369 * We must have bound cyclics... 1370 */ 1371 error = EBUSY; 1372 goto out; 1373 } 1374 cyclic_off = 1; 1375 } 1376 1377 /* 1378 * Call mp_cpu_stop() to perform any special operations 1379 * needed for this machine architecture to offline a CPU. 1380 */ 1381 if (error == 0) 1382 error = mp_cpu_stop(cp); /* arch-dep hook */ 1383 1384 /* 1385 * If that all worked, take the CPU offline and decrement 1386 * ncpus_online. 1387 */ 1388 if (error == 0) { 1389 /* 1390 * Put all the cpus into a known safe place. 1391 * No mutexes can be entered while CPUs are paused. 1392 */ 1393 pause_cpus(cp); 1394 /* 1395 * Repeat the operation, if necessary, to make sure that 1396 * all outstanding low-level interrupts run to completion 1397 * before we set the CPU_QUIESCED flag. It's also possible 1398 * that a thread has weak bound to the cpu despite our raising 1399 * cpu_inmotion above since it may have loaded that 1400 * value before the barrier became visible (this would have 1401 * to be the thread that was on the target cpu at the time 1402 * we raised the barrier). 1403 */ 1404 if ((!no_quiesce && cp->cpu_intr_actv != 0) || 1405 (*bound_func)(cp, 1)) { 1406 start_cpus(); 1407 (void) mp_cpu_start(cp); 1408 goto again; 1409 } 1410 ncp = cp->cpu_next_part; 1411 cpu_lpl = cp->cpu_lpl; 1412 ASSERT(cpu_lpl != NULL); 1413 1414 /* 1415 * Remove the CPU from the list of active CPUs. 1416 */ 1417 cpu_remove_active(cp); 1418 1419 /* 1420 * Walk the active process list and look for threads 1421 * whose home lgroup needs to be updated, or 1422 * the last CPU they run on is the one being offlined now. 1423 */ 1424 1425 ASSERT(curthread->t_cpu != cp); 1426 for (p = practive; p != NULL; p = p->p_next) { 1427 1428 t = p->p_tlist; 1429 1430 if (t == NULL) 1431 continue; 1432 1433 lgrp_diff_lpl = 0; 1434 1435 do { 1436 ASSERT(t->t_lpl != NULL); 1437 /* 1438 * Taking last CPU in lpl offline 1439 * Rehome thread if it is in this lpl 1440 * Otherwise, update the count of how many 1441 * threads are in this CPU's lgroup but have 1442 * a different lpl. 1443 */ 1444 1445 if (cpu_lpl->lpl_ncpu == 0) { 1446 if (t->t_lpl == cpu_lpl) 1447 lgrp_move_thread(t, 1448 lgrp_choose(t, 1449 t->t_cpupart), 0); 1450 else if (t->t_lpl->lpl_lgrpid == 1451 cpu_lpl->lpl_lgrpid) 1452 lgrp_diff_lpl++; 1453 } 1454 ASSERT(t->t_lpl->lpl_ncpu > 0); 1455 1456 /* 1457 * Update CPU last ran on if it was this CPU 1458 */ 1459 if (t->t_cpu == cp && t->t_bound_cpu != cp) 1460 t->t_cpu = disp_lowpri_cpu(ncp, 1461 t->t_lpl, t->t_pri, NULL); 1462 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1463 t->t_weakbound_cpu == cp); 1464 1465 t = t->t_forw; 1466 } while (t != p->p_tlist); 1467 1468 /* 1469 * Didn't find any threads in the same lgroup as this 1470 * CPU with a different lpl, so remove the lgroup from 1471 * the process lgroup bitmask. 1472 */ 1473 1474 if (lgrp_diff_lpl == 0) 1475 klgrpset_del(p->p_lgrpset, cpu_lpl->lpl_lgrpid); 1476 } 1477 1478 /* 1479 * Walk thread list looking for threads that need to be 1480 * rehomed, since there are some threads that are not in 1481 * their process's p_tlist. 1482 */ 1483 1484 t = curthread; 1485 do { 1486 ASSERT(t != NULL && t->t_lpl != NULL); 1487 1488 /* 1489 * Rehome threads with same lpl as this CPU when this 1490 * is the last CPU in the lpl. 1491 */ 1492 1493 if ((cpu_lpl->lpl_ncpu == 0) && (t->t_lpl == cpu_lpl)) 1494 lgrp_move_thread(t, 1495 lgrp_choose(t, t->t_cpupart), 1); 1496 1497 ASSERT(t->t_lpl->lpl_ncpu > 0); 1498 1499 /* 1500 * Update CPU last ran on if it was this CPU 1501 */ 1502 1503 if (t->t_cpu == cp && t->t_bound_cpu != cp) { 1504 t->t_cpu = disp_lowpri_cpu(ncp, 1505 t->t_lpl, t->t_pri, NULL); 1506 } 1507 ASSERT(t->t_cpu != cp || t->t_bound_cpu == cp || 1508 t->t_weakbound_cpu == cp); 1509 t = t->t_next; 1510 1511 } while (t != curthread); 1512 ASSERT((cp->cpu_flags & (CPU_FAULTED | CPU_SPARE)) == 0); 1513 cp->cpu_flags |= CPU_OFFLINE; 1514 disp_cpu_inactive(cp); 1515 if (!no_quiesce) 1516 cp->cpu_flags |= CPU_QUIESCED; 1517 ncpus_online--; 1518 cpu_set_state(cp); 1519 cpu_inmotion = NULL; 1520 start_cpus(); 1521 cpu_stats_kstat_destroy(cp); 1522 cpu_delete_intrstat(cp); 1523 lgrp_kstat_destroy(cp); 1524 } 1525 1526 out: 1527 cpu_inmotion = NULL; 1528 1529 /* 1530 * If we failed, re-enable interrupts. 1531 * Do this even if cpu_intr_disable returned an error, because 1532 * it may have partially disabled interrupts. 1533 */ 1534 if (error && intr_enable) 1535 cpu_intr_enable(cp); 1536 1537 /* 1538 * If we failed, but managed to offline the cyclic subsystem on this 1539 * CPU, bring it back online. 1540 */ 1541 if (error && cyclic_off) 1542 cyclic_online(cp); 1543 1544 /* 1545 * If we failed, but managed to offline callouts on this CPU, 1546 * bring it back online. 1547 */ 1548 if (error && callout_off) 1549 callout_cpu_online(cp); 1550 1551 /* 1552 * If we failed, tell the PG subsystem that the CPU is back 1553 */ 1554 pg_cpupart_in(cp, pp); 1555 1556 /* 1557 * If we failed, we need to notify everyone that this CPU is back on. 1558 */ 1559 if (error != 0) 1560 cpu_state_change_notify(cp->cpu_id, CPU_ON); 1561 1562 return (error); 1563 } 1564 1565 /* 1566 * Mark the indicated CPU as faulted, taking it offline. 1567 */ 1568 int 1569 cpu_faulted(cpu_t *cp, int flags) 1570 { 1571 int error = 0; 1572 1573 ASSERT(MUTEX_HELD(&cpu_lock)); 1574 ASSERT(!cpu_is_poweredoff(cp)); 1575 1576 if (cpu_is_offline(cp)) { 1577 cp->cpu_flags &= ~CPU_SPARE; 1578 cp->cpu_flags |= CPU_FAULTED; 1579 mp_cpu_faulted_enter(cp); 1580 cpu_set_state(cp); 1581 return (0); 1582 } 1583 1584 if ((error = cpu_offline(cp, flags)) == 0) { 1585 cp->cpu_flags |= CPU_FAULTED; 1586 mp_cpu_faulted_enter(cp); 1587 cpu_set_state(cp); 1588 } 1589 1590 return (error); 1591 } 1592 1593 /* 1594 * Mark the indicated CPU as a spare, taking it offline. 1595 */ 1596 int 1597 cpu_spare(cpu_t *cp, int flags) 1598 { 1599 int error = 0; 1600 1601 ASSERT(MUTEX_HELD(&cpu_lock)); 1602 ASSERT(!cpu_is_poweredoff(cp)); 1603 1604 if (cpu_is_offline(cp)) { 1605 if (cp->cpu_flags & CPU_FAULTED) { 1606 cp->cpu_flags &= ~CPU_FAULTED; 1607 mp_cpu_faulted_exit(cp); 1608 } 1609 cp->cpu_flags |= CPU_SPARE; 1610 cpu_set_state(cp); 1611 return (0); 1612 } 1613 1614 if ((error = cpu_offline(cp, flags)) == 0) { 1615 cp->cpu_flags |= CPU_SPARE; 1616 cpu_set_state(cp); 1617 } 1618 1619 return (error); 1620 } 1621 1622 /* 1623 * Take the indicated CPU from poweroff to offline. 1624 */ 1625 int 1626 cpu_poweron(cpu_t *cp) 1627 { 1628 int error = ENOTSUP; 1629 1630 ASSERT(MUTEX_HELD(&cpu_lock)); 1631 ASSERT(cpu_is_poweredoff(cp)); 1632 1633 error = mp_cpu_poweron(cp); /* arch-dep hook */ 1634 if (error == 0) 1635 cpu_set_state(cp); 1636 1637 return (error); 1638 } 1639 1640 /* 1641 * Take the indicated CPU from any inactive state to powered off. 1642 */ 1643 int 1644 cpu_poweroff(cpu_t *cp) 1645 { 1646 int error = ENOTSUP; 1647 1648 ASSERT(MUTEX_HELD(&cpu_lock)); 1649 ASSERT(cpu_is_offline(cp)); 1650 1651 if (!(cp->cpu_flags & CPU_QUIESCED)) 1652 return (EBUSY); /* not completely idle */ 1653 1654 error = mp_cpu_poweroff(cp); /* arch-dep hook */ 1655 if (error == 0) 1656 cpu_set_state(cp); 1657 1658 return (error); 1659 } 1660 1661 /* 1662 * Initialize the Sequential CPU id lookup table 1663 */ 1664 void 1665 cpu_seq_tbl_init() 1666 { 1667 cpu_t **tbl; 1668 1669 tbl = kmem_zalloc(sizeof (struct cpu *) * max_ncpus, KM_SLEEP); 1670 tbl[0] = CPU; 1671 1672 cpu_seq = tbl; 1673 } 1674 1675 /* 1676 * Initialize the CPU lists for the first CPU. 1677 */ 1678 void 1679 cpu_list_init(cpu_t *cp) 1680 { 1681 cp->cpu_next = cp; 1682 cp->cpu_prev = cp; 1683 cpu_list = cp; 1684 clock_cpu_list = cp; 1685 1686 cp->cpu_next_onln = cp; 1687 cp->cpu_prev_onln = cp; 1688 cpu_active = cp; 1689 1690 cp->cpu_seqid = 0; 1691 CPUSET_ADD(cpu_seqid_inuse, 0); 1692 1693 /* 1694 * Bootstrap cpu_seq using cpu_list 1695 * The cpu_seq[] table will be dynamically allocated 1696 * when kmem later becomes available (but before going MP) 1697 */ 1698 cpu_seq = &cpu_list; 1699 1700 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1701 cp_default.cp_cpulist = cp; 1702 cp_default.cp_ncpus = 1; 1703 cp->cpu_next_part = cp; 1704 cp->cpu_prev_part = cp; 1705 cp->cpu_part = &cp_default; 1706 1707 CPUSET_ADD(cpu_available, cp->cpu_id); 1708 } 1709 1710 /* 1711 * Insert a CPU into the list of available CPUs. 1712 */ 1713 void 1714 cpu_add_unit(cpu_t *cp) 1715 { 1716 int seqid; 1717 1718 ASSERT(MUTEX_HELD(&cpu_lock)); 1719 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1720 1721 lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)cp, 0); 1722 1723 /* 1724 * Note: most users of the cpu_list will grab the 1725 * cpu_lock to insure that it isn't modified. However, 1726 * certain users can't or won't do that. To allow this 1727 * we pause the other cpus. Users who walk the list 1728 * without cpu_lock, must disable kernel preemption 1729 * to insure that the list isn't modified underneath 1730 * them. Also, any cached pointers to cpu structures 1731 * must be revalidated by checking to see if the 1732 * cpu_next pointer points to itself. This check must 1733 * be done with the cpu_lock held or kernel preemption 1734 * disabled. This check relies upon the fact that 1735 * old cpu structures are not free'ed or cleared after 1736 * then are removed from the cpu_list. 1737 * 1738 * Note that the clock code walks the cpu list dereferencing 1739 * the cpu_part pointer, so we need to initialize it before 1740 * adding the cpu to the list. 1741 */ 1742 cp->cpu_part = &cp_default; 1743 (void) pause_cpus(NULL); 1744 cp->cpu_next = cpu_list; 1745 cp->cpu_prev = cpu_list->cpu_prev; 1746 cpu_list->cpu_prev->cpu_next = cp; 1747 cpu_list->cpu_prev = cp; 1748 start_cpus(); 1749 1750 for (seqid = 0; CPU_IN_SET(cpu_seqid_inuse, seqid); seqid++) 1751 continue; 1752 CPUSET_ADD(cpu_seqid_inuse, seqid); 1753 cp->cpu_seqid = seqid; 1754 ASSERT(ncpus < max_ncpus); 1755 ncpus++; 1756 cp->cpu_cache_offset = KMEM_CPU_CACHE_OFFSET(cp->cpu_seqid); 1757 cpu[cp->cpu_id] = cp; 1758 CPUSET_ADD(cpu_available, cp->cpu_id); 1759 cpu_seq[cp->cpu_seqid] = cp; 1760 1761 /* 1762 * allocate a pause thread for this CPU. 1763 */ 1764 cpu_pause_alloc(cp); 1765 1766 /* 1767 * So that new CPUs won't have NULL prev_onln and next_onln pointers, 1768 * link them into a list of just that CPU. 1769 * This is so that disp_lowpri_cpu will work for thread_create in 1770 * pause_cpus() when called from the startup thread in a new CPU. 1771 */ 1772 cp->cpu_next_onln = cp; 1773 cp->cpu_prev_onln = cp; 1774 cpu_info_kstat_create(cp); 1775 cp->cpu_next_part = cp; 1776 cp->cpu_prev_part = cp; 1777 1778 init_cpu_mstate(cp, CMS_SYSTEM); 1779 1780 pool_pset_mod = gethrtime(); 1781 } 1782 1783 /* 1784 * Do the opposite of cpu_add_unit(). 1785 */ 1786 void 1787 cpu_del_unit(int cpuid) 1788 { 1789 struct cpu *cp, *cpnext; 1790 1791 ASSERT(MUTEX_HELD(&cpu_lock)); 1792 cp = cpu[cpuid]; 1793 ASSERT(cp != NULL); 1794 1795 ASSERT(cp->cpu_next_onln == cp); 1796 ASSERT(cp->cpu_prev_onln == cp); 1797 ASSERT(cp->cpu_next_part == cp); 1798 ASSERT(cp->cpu_prev_part == cp); 1799 1800 /* 1801 * Tear down the CPU's physical ID cache, and update any 1802 * processor groups 1803 */ 1804 pg_cpu_fini(cp); 1805 pghw_physid_destroy(cp); 1806 1807 /* 1808 * Destroy kstat stuff. 1809 */ 1810 cpu_info_kstat_destroy(cp); 1811 term_cpu_mstate(cp); 1812 /* 1813 * Free up pause thread. 1814 */ 1815 cpu_pause_free(cp); 1816 CPUSET_DEL(cpu_available, cp->cpu_id); 1817 cpu[cp->cpu_id] = NULL; 1818 cpu_seq[cp->cpu_seqid] = NULL; 1819 1820 /* 1821 * The clock thread and mutex_vector_enter cannot hold the 1822 * cpu_lock while traversing the cpu list, therefore we pause 1823 * all other threads by pausing the other cpus. These, and any 1824 * other routines holding cpu pointers while possibly sleeping 1825 * must be sure to call kpreempt_disable before processing the 1826 * list and be sure to check that the cpu has not been deleted 1827 * after any sleeps (check cp->cpu_next != NULL). We guarantee 1828 * to keep the deleted cpu structure around. 1829 * 1830 * Note that this MUST be done AFTER cpu_available 1831 * has been updated so that we don't waste time 1832 * trying to pause the cpu we're trying to delete. 1833 */ 1834 (void) pause_cpus(NULL); 1835 1836 cpnext = cp->cpu_next; 1837 cp->cpu_prev->cpu_next = cp->cpu_next; 1838 cp->cpu_next->cpu_prev = cp->cpu_prev; 1839 if (cp == cpu_list) 1840 cpu_list = cpnext; 1841 1842 /* 1843 * Signals that the cpu has been deleted (see above). 1844 */ 1845 cp->cpu_next = NULL; 1846 cp->cpu_prev = NULL; 1847 1848 start_cpus(); 1849 1850 CPUSET_DEL(cpu_seqid_inuse, cp->cpu_seqid); 1851 ncpus--; 1852 lgrp_config(LGRP_CONFIG_CPU_DEL, (uintptr_t)cp, 0); 1853 1854 pool_pset_mod = gethrtime(); 1855 } 1856 1857 /* 1858 * Add a CPU to the list of active CPUs. 1859 * This routine must not get any locks, because other CPUs are paused. 1860 */ 1861 static void 1862 cpu_add_active_internal(cpu_t *cp) 1863 { 1864 cpupart_t *pp = cp->cpu_part; 1865 1866 ASSERT(MUTEX_HELD(&cpu_lock)); 1867 ASSERT(cpu_list != NULL); /* list started in cpu_list_init */ 1868 1869 ncpus_online++; 1870 cpu_set_state(cp); 1871 cp->cpu_next_onln = cpu_active; 1872 cp->cpu_prev_onln = cpu_active->cpu_prev_onln; 1873 cpu_active->cpu_prev_onln->cpu_next_onln = cp; 1874 cpu_active->cpu_prev_onln = cp; 1875 1876 if (pp->cp_cpulist) { 1877 cp->cpu_next_part = pp->cp_cpulist; 1878 cp->cpu_prev_part = pp->cp_cpulist->cpu_prev_part; 1879 pp->cp_cpulist->cpu_prev_part->cpu_next_part = cp; 1880 pp->cp_cpulist->cpu_prev_part = cp; 1881 } else { 1882 ASSERT(pp->cp_ncpus == 0); 1883 pp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp; 1884 } 1885 pp->cp_ncpus++; 1886 if (pp->cp_ncpus == 1) { 1887 cp_numparts_nonempty++; 1888 ASSERT(cp_numparts_nonempty != 0); 1889 } 1890 1891 pg_cpu_active(cp); 1892 lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)cp, 0); 1893 1894 bzero(&cp->cpu_loadavg, sizeof (cp->cpu_loadavg)); 1895 } 1896 1897 /* 1898 * Add a CPU to the list of active CPUs. 1899 * This is called from machine-dependent layers when a new CPU is started. 1900 */ 1901 void 1902 cpu_add_active(cpu_t *cp) 1903 { 1904 pg_cpupart_in(cp, cp->cpu_part); 1905 1906 pause_cpus(NULL); 1907 cpu_add_active_internal(cp); 1908 start_cpus(); 1909 1910 cpu_stats_kstat_create(cp); 1911 cpu_create_intrstat(cp); 1912 lgrp_kstat_create(cp); 1913 cpu_state_change_notify(cp->cpu_id, CPU_INIT); 1914 } 1915 1916 1917 /* 1918 * Remove a CPU from the list of active CPUs. 1919 * This routine must not get any locks, because other CPUs are paused. 1920 */ 1921 /* ARGSUSED */ 1922 static void 1923 cpu_remove_active(cpu_t *cp) 1924 { 1925 cpupart_t *pp = cp->cpu_part; 1926 1927 ASSERT(MUTEX_HELD(&cpu_lock)); 1928 ASSERT(cp->cpu_next_onln != cp); /* not the last one */ 1929 ASSERT(cp->cpu_prev_onln != cp); /* not the last one */ 1930 1931 pg_cpu_inactive(cp); 1932 1933 lgrp_config(LGRP_CONFIG_CPU_OFFLINE, (uintptr_t)cp, 0); 1934 1935 if (cp == clock_cpu_list) 1936 clock_cpu_list = cp->cpu_next_onln; 1937 1938 cp->cpu_prev_onln->cpu_next_onln = cp->cpu_next_onln; 1939 cp->cpu_next_onln->cpu_prev_onln = cp->cpu_prev_onln; 1940 if (cpu_active == cp) { 1941 cpu_active = cp->cpu_next_onln; 1942 } 1943 cp->cpu_next_onln = cp; 1944 cp->cpu_prev_onln = cp; 1945 1946 cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part; 1947 cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part; 1948 if (pp->cp_cpulist == cp) { 1949 pp->cp_cpulist = cp->cpu_next_part; 1950 ASSERT(pp->cp_cpulist != cp); 1951 } 1952 cp->cpu_next_part = cp; 1953 cp->cpu_prev_part = cp; 1954 pp->cp_ncpus--; 1955 if (pp->cp_ncpus == 0) { 1956 cp_numparts_nonempty--; 1957 ASSERT(cp_numparts_nonempty != 0); 1958 } 1959 } 1960 1961 /* 1962 * Routine used to setup a newly inserted CPU in preparation for starting 1963 * it running code. 1964 */ 1965 int 1966 cpu_configure(int cpuid) 1967 { 1968 int retval = 0; 1969 1970 ASSERT(MUTEX_HELD(&cpu_lock)); 1971 1972 /* 1973 * Some structures are statically allocated based upon 1974 * the maximum number of cpus the system supports. Do not 1975 * try to add anything beyond this limit. 1976 */ 1977 if (cpuid < 0 || cpuid >= NCPU) { 1978 return (EINVAL); 1979 } 1980 1981 if ((cpu[cpuid] != NULL) && (cpu[cpuid]->cpu_flags != 0)) { 1982 return (EALREADY); 1983 } 1984 1985 if ((retval = mp_cpu_configure(cpuid)) != 0) { 1986 return (retval); 1987 } 1988 1989 cpu[cpuid]->cpu_flags = CPU_QUIESCED | CPU_OFFLINE | CPU_POWEROFF; 1990 cpu_set_state(cpu[cpuid]); 1991 retval = cpu_state_change_hooks(cpuid, CPU_CONFIG, CPU_UNCONFIG); 1992 if (retval != 0) 1993 (void) mp_cpu_unconfigure(cpuid); 1994 1995 return (retval); 1996 } 1997 1998 /* 1999 * Routine used to cleanup a CPU that has been powered off. This will 2000 * destroy all per-cpu information related to this cpu. 2001 */ 2002 int 2003 cpu_unconfigure(int cpuid) 2004 { 2005 int error; 2006 2007 ASSERT(MUTEX_HELD(&cpu_lock)); 2008 2009 if (cpu[cpuid] == NULL) { 2010 return (ENODEV); 2011 } 2012 2013 if (cpu[cpuid]->cpu_flags == 0) { 2014 return (EALREADY); 2015 } 2016 2017 if ((cpu[cpuid]->cpu_flags & CPU_POWEROFF) == 0) { 2018 return (EBUSY); 2019 } 2020 2021 if (cpu[cpuid]->cpu_props != NULL) { 2022 (void) nvlist_free(cpu[cpuid]->cpu_props); 2023 cpu[cpuid]->cpu_props = NULL; 2024 } 2025 2026 error = cpu_state_change_hooks(cpuid, CPU_UNCONFIG, CPU_CONFIG); 2027 2028 if (error != 0) 2029 return (error); 2030 2031 return (mp_cpu_unconfigure(cpuid)); 2032 } 2033 2034 /* 2035 * Routines for registering and de-registering cpu_setup callback functions. 2036 * 2037 * Caller's context 2038 * These routines must not be called from a driver's attach(9E) or 2039 * detach(9E) entry point. 2040 * 2041 * NOTE: CPU callbacks should not block. They are called with cpu_lock held. 2042 */ 2043 2044 /* 2045 * Ideally, these would be dynamically allocated and put into a linked 2046 * list; however that is not feasible because the registration routine 2047 * has to be available before the kmem allocator is working (in fact, 2048 * it is called by the kmem allocator init code). In any case, there 2049 * are quite a few extra entries for future users. 2050 */ 2051 #define NCPU_SETUPS 20 2052 2053 struct cpu_setup { 2054 cpu_setup_func_t *func; 2055 void *arg; 2056 } cpu_setups[NCPU_SETUPS]; 2057 2058 void 2059 register_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2060 { 2061 int i; 2062 2063 ASSERT(MUTEX_HELD(&cpu_lock)); 2064 2065 for (i = 0; i < NCPU_SETUPS; i++) 2066 if (cpu_setups[i].func == NULL) 2067 break; 2068 if (i >= NCPU_SETUPS) 2069 cmn_err(CE_PANIC, "Ran out of cpu_setup callback entries"); 2070 2071 cpu_setups[i].func = func; 2072 cpu_setups[i].arg = arg; 2073 } 2074 2075 void 2076 unregister_cpu_setup_func(cpu_setup_func_t *func, void *arg) 2077 { 2078 int i; 2079 2080 ASSERT(MUTEX_HELD(&cpu_lock)); 2081 2082 for (i = 0; i < NCPU_SETUPS; i++) 2083 if ((cpu_setups[i].func == func) && 2084 (cpu_setups[i].arg == arg)) 2085 break; 2086 if (i >= NCPU_SETUPS) 2087 cmn_err(CE_PANIC, "Could not find cpu_setup callback to " 2088 "deregister"); 2089 2090 cpu_setups[i].func = NULL; 2091 cpu_setups[i].arg = 0; 2092 } 2093 2094 /* 2095 * Call any state change hooks for this CPU, ignore any errors. 2096 */ 2097 void 2098 cpu_state_change_notify(int id, cpu_setup_t what) 2099 { 2100 int i; 2101 2102 ASSERT(MUTEX_HELD(&cpu_lock)); 2103 2104 for (i = 0; i < NCPU_SETUPS; i++) { 2105 if (cpu_setups[i].func != NULL) { 2106 cpu_setups[i].func(what, id, cpu_setups[i].arg); 2107 } 2108 } 2109 } 2110 2111 /* 2112 * Call any state change hooks for this CPU, undo it if error found. 2113 */ 2114 static int 2115 cpu_state_change_hooks(int id, cpu_setup_t what, cpu_setup_t undo) 2116 { 2117 int i; 2118 int retval = 0; 2119 2120 ASSERT(MUTEX_HELD(&cpu_lock)); 2121 2122 for (i = 0; i < NCPU_SETUPS; i++) { 2123 if (cpu_setups[i].func != NULL) { 2124 retval = cpu_setups[i].func(what, id, 2125 cpu_setups[i].arg); 2126 if (retval) { 2127 for (i--; i >= 0; i--) { 2128 if (cpu_setups[i].func != NULL) 2129 cpu_setups[i].func(undo, 2130 id, cpu_setups[i].arg); 2131 } 2132 break; 2133 } 2134 } 2135 } 2136 return (retval); 2137 } 2138 2139 /* 2140 * Export information about this CPU via the kstat mechanism. 2141 */ 2142 static struct { 2143 kstat_named_t ci_state; 2144 kstat_named_t ci_state_begin; 2145 kstat_named_t ci_cpu_type; 2146 kstat_named_t ci_fpu_type; 2147 kstat_named_t ci_clock_MHz; 2148 kstat_named_t ci_chip_id; 2149 kstat_named_t ci_implementation; 2150 kstat_named_t ci_brandstr; 2151 kstat_named_t ci_core_id; 2152 kstat_named_t ci_curr_clock_Hz; 2153 kstat_named_t ci_supp_freq_Hz; 2154 #if defined(__sparcv9) 2155 kstat_named_t ci_device_ID; 2156 kstat_named_t ci_cpu_fru; 2157 #endif 2158 #if defined(__x86) 2159 kstat_named_t ci_vendorstr; 2160 kstat_named_t ci_family; 2161 kstat_named_t ci_model; 2162 kstat_named_t ci_step; 2163 kstat_named_t ci_clogid; 2164 kstat_named_t ci_pkg_core_id; 2165 kstat_named_t ci_ncpuperchip; 2166 kstat_named_t ci_ncoreperchip; 2167 kstat_named_t ci_max_cstates; 2168 kstat_named_t ci_curr_cstate; 2169 #endif 2170 } cpu_info_template = { 2171 { "state", KSTAT_DATA_CHAR }, 2172 { "state_begin", KSTAT_DATA_LONG }, 2173 { "cpu_type", KSTAT_DATA_CHAR }, 2174 { "fpu_type", KSTAT_DATA_CHAR }, 2175 { "clock_MHz", KSTAT_DATA_LONG }, 2176 { "chip_id", KSTAT_DATA_LONG }, 2177 { "implementation", KSTAT_DATA_STRING }, 2178 { "brand", KSTAT_DATA_STRING }, 2179 { "core_id", KSTAT_DATA_LONG }, 2180 { "current_clock_Hz", KSTAT_DATA_UINT64 }, 2181 { "supported_frequencies_Hz", KSTAT_DATA_STRING }, 2182 #if defined(__sparcv9) 2183 { "device_ID", KSTAT_DATA_UINT64 }, 2184 { "cpu_fru", KSTAT_DATA_STRING }, 2185 #endif 2186 #if defined(__x86) 2187 { "vendor_id", KSTAT_DATA_STRING }, 2188 { "family", KSTAT_DATA_INT32 }, 2189 { "model", KSTAT_DATA_INT32 }, 2190 { "stepping", KSTAT_DATA_INT32 }, 2191 { "clog_id", KSTAT_DATA_INT32 }, 2192 { "pkg_core_id", KSTAT_DATA_LONG }, 2193 { "ncpu_per_chip", KSTAT_DATA_INT32 }, 2194 { "ncore_per_chip", KSTAT_DATA_INT32 }, 2195 { "supported_max_cstates", KSTAT_DATA_INT32 }, 2196 { "current_cstate", KSTAT_DATA_INT32 }, 2197 #endif 2198 }; 2199 2200 static kmutex_t cpu_info_template_lock; 2201 2202 static int 2203 cpu_info_kstat_update(kstat_t *ksp, int rw) 2204 { 2205 cpu_t *cp = ksp->ks_private; 2206 const char *pi_state; 2207 2208 if (rw == KSTAT_WRITE) 2209 return (EACCES); 2210 2211 switch (cp->cpu_type_info.pi_state) { 2212 case P_ONLINE: 2213 pi_state = PS_ONLINE; 2214 break; 2215 case P_POWEROFF: 2216 pi_state = PS_POWEROFF; 2217 break; 2218 case P_NOINTR: 2219 pi_state = PS_NOINTR; 2220 break; 2221 case P_FAULTED: 2222 pi_state = PS_FAULTED; 2223 break; 2224 case P_SPARE: 2225 pi_state = PS_SPARE; 2226 break; 2227 case P_OFFLINE: 2228 pi_state = PS_OFFLINE; 2229 break; 2230 default: 2231 pi_state = "unknown"; 2232 } 2233 (void) strcpy(cpu_info_template.ci_state.value.c, pi_state); 2234 cpu_info_template.ci_state_begin.value.l = cp->cpu_state_begin; 2235 (void) strncpy(cpu_info_template.ci_cpu_type.value.c, 2236 cp->cpu_type_info.pi_processor_type, 15); 2237 (void) strncpy(cpu_info_template.ci_fpu_type.value.c, 2238 cp->cpu_type_info.pi_fputypes, 15); 2239 cpu_info_template.ci_clock_MHz.value.l = cp->cpu_type_info.pi_clock; 2240 cpu_info_template.ci_chip_id.value.l = 2241 pg_plat_hw_instance_id(cp, PGHW_CHIP); 2242 kstat_named_setstr(&cpu_info_template.ci_implementation, 2243 cp->cpu_idstr); 2244 kstat_named_setstr(&cpu_info_template.ci_brandstr, cp->cpu_brandstr); 2245 cpu_info_template.ci_core_id.value.l = pg_plat_get_core_id(cp); 2246 cpu_info_template.ci_curr_clock_Hz.value.ui64 = 2247 cp->cpu_curr_clock; 2248 kstat_named_setstr(&cpu_info_template.ci_supp_freq_Hz, 2249 cp->cpu_supp_freqs); 2250 #if defined(__sparcv9) 2251 cpu_info_template.ci_device_ID.value.ui64 = 2252 cpunodes[cp->cpu_id].device_id; 2253 kstat_named_setstr(&cpu_info_template.ci_cpu_fru, cpu_fru_fmri(cp)); 2254 #endif 2255 #if defined(__x86) 2256 kstat_named_setstr(&cpu_info_template.ci_vendorstr, 2257 cpuid_getvendorstr(cp)); 2258 cpu_info_template.ci_family.value.l = cpuid_getfamily(cp); 2259 cpu_info_template.ci_model.value.l = cpuid_getmodel(cp); 2260 cpu_info_template.ci_step.value.l = cpuid_getstep(cp); 2261 cpu_info_template.ci_clogid.value.l = cpuid_get_clogid(cp); 2262 cpu_info_template.ci_ncpuperchip.value.l = cpuid_get_ncpu_per_chip(cp); 2263 cpu_info_template.ci_ncoreperchip.value.l = 2264 cpuid_get_ncore_per_chip(cp); 2265 cpu_info_template.ci_pkg_core_id.value.l = cpuid_get_pkgcoreid(cp); 2266 cpu_info_template.ci_max_cstates.value.l = cp->cpu_m.max_cstates; 2267 cpu_info_template.ci_curr_cstate.value.l = cp->cpu_m.curr_cstate; 2268 #endif 2269 2270 return (0); 2271 } 2272 2273 static void 2274 cpu_info_kstat_create(cpu_t *cp) 2275 { 2276 zoneid_t zoneid; 2277 2278 ASSERT(MUTEX_HELD(&cpu_lock)); 2279 2280 if (pool_pset_enabled()) 2281 zoneid = GLOBAL_ZONEID; 2282 else 2283 zoneid = ALL_ZONES; 2284 if ((cp->cpu_info_kstat = kstat_create_zone("cpu_info", cp->cpu_id, 2285 NULL, "misc", KSTAT_TYPE_NAMED, 2286 sizeof (cpu_info_template) / sizeof (kstat_named_t), 2287 KSTAT_FLAG_VIRTUAL, zoneid)) != NULL) { 2288 cp->cpu_info_kstat->ks_data_size += 2 * CPU_IDSTRLEN; 2289 #if defined(__sparcv9) 2290 cp->cpu_info_kstat->ks_data_size += 2291 strlen(cpu_fru_fmri(cp)) + 1; 2292 #endif 2293 #if defined(__x86) 2294 cp->cpu_info_kstat->ks_data_size += X86_VENDOR_STRLEN; 2295 #endif 2296 if (cp->cpu_supp_freqs != NULL) 2297 cp->cpu_info_kstat->ks_data_size += 2298 strlen(cp->cpu_supp_freqs) + 1; 2299 cp->cpu_info_kstat->ks_lock = &cpu_info_template_lock; 2300 cp->cpu_info_kstat->ks_data = &cpu_info_template; 2301 cp->cpu_info_kstat->ks_private = cp; 2302 cp->cpu_info_kstat->ks_update = cpu_info_kstat_update; 2303 kstat_install(cp->cpu_info_kstat); 2304 } 2305 } 2306 2307 static void 2308 cpu_info_kstat_destroy(cpu_t *cp) 2309 { 2310 ASSERT(MUTEX_HELD(&cpu_lock)); 2311 2312 kstat_delete(cp->cpu_info_kstat); 2313 cp->cpu_info_kstat = NULL; 2314 } 2315 2316 /* 2317 * Create and install kstats for the boot CPU. 2318 */ 2319 void 2320 cpu_kstat_init(cpu_t *cp) 2321 { 2322 mutex_enter(&cpu_lock); 2323 cpu_info_kstat_create(cp); 2324 cpu_stats_kstat_create(cp); 2325 cpu_create_intrstat(cp); 2326 cpu_set_state(cp); 2327 mutex_exit(&cpu_lock); 2328 } 2329 2330 /* 2331 * Make visible to the zone that subset of the cpu information that would be 2332 * initialized when a cpu is configured (but still offline). 2333 */ 2334 void 2335 cpu_visibility_configure(cpu_t *cp, zone_t *zone) 2336 { 2337 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2338 2339 ASSERT(MUTEX_HELD(&cpu_lock)); 2340 ASSERT(pool_pset_enabled()); 2341 ASSERT(cp != NULL); 2342 2343 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2344 zone->zone_ncpus++; 2345 ASSERT(zone->zone_ncpus <= ncpus); 2346 } 2347 if (cp->cpu_info_kstat != NULL) 2348 kstat_zone_add(cp->cpu_info_kstat, zoneid); 2349 } 2350 2351 /* 2352 * Make visible to the zone that subset of the cpu information that would be 2353 * initialized when a previously configured cpu is onlined. 2354 */ 2355 void 2356 cpu_visibility_online(cpu_t *cp, zone_t *zone) 2357 { 2358 kstat_t *ksp; 2359 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2360 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2361 processorid_t cpun; 2362 2363 ASSERT(MUTEX_HELD(&cpu_lock)); 2364 ASSERT(pool_pset_enabled()); 2365 ASSERT(cp != NULL); 2366 ASSERT(cpu_is_active(cp)); 2367 2368 cpun = cp->cpu_id; 2369 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2370 zone->zone_ncpus_online++; 2371 ASSERT(zone->zone_ncpus_online <= ncpus_online); 2372 } 2373 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2374 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2375 != NULL) { 2376 kstat_zone_add(ksp, zoneid); 2377 kstat_rele(ksp); 2378 } 2379 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2380 kstat_zone_add(ksp, zoneid); 2381 kstat_rele(ksp); 2382 } 2383 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2384 kstat_zone_add(ksp, zoneid); 2385 kstat_rele(ksp); 2386 } 2387 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2388 NULL) { 2389 kstat_zone_add(ksp, zoneid); 2390 kstat_rele(ksp); 2391 } 2392 } 2393 2394 /* 2395 * Update relevant kstats such that cpu is now visible to processes 2396 * executing in specified zone. 2397 */ 2398 void 2399 cpu_visibility_add(cpu_t *cp, zone_t *zone) 2400 { 2401 cpu_visibility_configure(cp, zone); 2402 if (cpu_is_active(cp)) 2403 cpu_visibility_online(cp, zone); 2404 } 2405 2406 /* 2407 * Make invisible to the zone that subset of the cpu information that would be 2408 * torn down when a previously offlined cpu is unconfigured. 2409 */ 2410 void 2411 cpu_visibility_unconfigure(cpu_t *cp, zone_t *zone) 2412 { 2413 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2414 2415 ASSERT(MUTEX_HELD(&cpu_lock)); 2416 ASSERT(pool_pset_enabled()); 2417 ASSERT(cp != NULL); 2418 2419 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2420 ASSERT(zone->zone_ncpus != 0); 2421 zone->zone_ncpus--; 2422 } 2423 if (cp->cpu_info_kstat) 2424 kstat_zone_remove(cp->cpu_info_kstat, zoneid); 2425 } 2426 2427 /* 2428 * Make invisible to the zone that subset of the cpu information that would be 2429 * torn down when a cpu is offlined (but still configured). 2430 */ 2431 void 2432 cpu_visibility_offline(cpu_t *cp, zone_t *zone) 2433 { 2434 kstat_t *ksp; 2435 char name[sizeof ("cpu_stat") + 10]; /* enough for 32-bit cpuids */ 2436 zoneid_t zoneid = zone ? zone->zone_id : ALL_ZONES; 2437 processorid_t cpun; 2438 2439 ASSERT(MUTEX_HELD(&cpu_lock)); 2440 ASSERT(pool_pset_enabled()); 2441 ASSERT(cp != NULL); 2442 ASSERT(cpu_is_active(cp)); 2443 2444 cpun = cp->cpu_id; 2445 if (zoneid != ALL_ZONES && zoneid != GLOBAL_ZONEID) { 2446 ASSERT(zone->zone_ncpus_online != 0); 2447 zone->zone_ncpus_online--; 2448 } 2449 2450 if ((ksp = kstat_hold_byname("cpu", cpun, "intrstat", ALL_ZONES)) != 2451 NULL) { 2452 kstat_zone_remove(ksp, zoneid); 2453 kstat_rele(ksp); 2454 } 2455 if ((ksp = kstat_hold_byname("cpu", cpun, "vm", ALL_ZONES)) != NULL) { 2456 kstat_zone_remove(ksp, zoneid); 2457 kstat_rele(ksp); 2458 } 2459 if ((ksp = kstat_hold_byname("cpu", cpun, "sys", ALL_ZONES)) != NULL) { 2460 kstat_zone_remove(ksp, zoneid); 2461 kstat_rele(ksp); 2462 } 2463 (void) snprintf(name, sizeof (name), "cpu_stat%d", cpun); 2464 if ((ksp = kstat_hold_byname("cpu_stat", cpun, name, ALL_ZONES)) 2465 != NULL) { 2466 kstat_zone_remove(ksp, zoneid); 2467 kstat_rele(ksp); 2468 } 2469 } 2470 2471 /* 2472 * Update relevant kstats such that cpu is no longer visible to processes 2473 * executing in specified zone. 2474 */ 2475 void 2476 cpu_visibility_remove(cpu_t *cp, zone_t *zone) 2477 { 2478 if (cpu_is_active(cp)) 2479 cpu_visibility_offline(cp, zone); 2480 cpu_visibility_unconfigure(cp, zone); 2481 } 2482 2483 /* 2484 * Bind a thread to a CPU as requested. 2485 */ 2486 int 2487 cpu_bind_thread(kthread_id_t tp, processorid_t bind, processorid_t *obind, 2488 int *error) 2489 { 2490 processorid_t binding; 2491 cpu_t *cp = NULL; 2492 2493 ASSERT(MUTEX_HELD(&cpu_lock)); 2494 ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock)); 2495 2496 thread_lock(tp); 2497 2498 /* 2499 * Record old binding, but change the obind, which was initialized 2500 * to PBIND_NONE, only if this thread has a binding. This avoids 2501 * reporting PBIND_NONE for a process when some LWPs are bound. 2502 */ 2503 binding = tp->t_bind_cpu; 2504 if (binding != PBIND_NONE) 2505 *obind = binding; /* record old binding */ 2506 2507 switch (bind) { 2508 case PBIND_QUERY: 2509 /* Just return the old binding */ 2510 thread_unlock(tp); 2511 return (0); 2512 2513 case PBIND_QUERY_TYPE: 2514 /* Return the binding type */ 2515 *obind = TB_CPU_IS_SOFT(tp) ? PBIND_SOFT : PBIND_HARD; 2516 thread_unlock(tp); 2517 return (0); 2518 2519 case PBIND_SOFT: 2520 /* 2521 * Set soft binding for this thread and return the actual 2522 * binding 2523 */ 2524 TB_CPU_SOFT_SET(tp); 2525 thread_unlock(tp); 2526 return (0); 2527 2528 case PBIND_HARD: 2529 /* 2530 * Set hard binding for this thread and return the actual 2531 * binding 2532 */ 2533 TB_CPU_HARD_SET(tp); 2534 thread_unlock(tp); 2535 return (0); 2536 2537 default: 2538 break; 2539 } 2540 2541 /* 2542 * If this thread/LWP cannot be bound because of permission 2543 * problems, just note that and return success so that the 2544 * other threads/LWPs will be bound. This is the way 2545 * processor_bind() is defined to work. 2546 * 2547 * Binding will get EPERM if the thread is of system class 2548 * or hasprocperm() fails. 2549 */ 2550 if (tp->t_cid == 0 || !hasprocperm(tp->t_cred, CRED())) { 2551 *error = EPERM; 2552 thread_unlock(tp); 2553 return (0); 2554 } 2555 2556 binding = bind; 2557 if (binding != PBIND_NONE) { 2558 cp = cpu_get((processorid_t)binding); 2559 /* 2560 * Make sure binding is valid and is in right partition. 2561 */ 2562 if (cp == NULL || tp->t_cpupart != cp->cpu_part) { 2563 *error = EINVAL; 2564 thread_unlock(tp); 2565 return (0); 2566 } 2567 } 2568 tp->t_bind_cpu = binding; /* set new binding */ 2569 2570 /* 2571 * If there is no system-set reason for affinity, set 2572 * the t_bound_cpu field to reflect the binding. 2573 */ 2574 if (tp->t_affinitycnt == 0) { 2575 if (binding == PBIND_NONE) { 2576 /* 2577 * We may need to adjust disp_max_unbound_pri 2578 * since we're becoming unbound. 2579 */ 2580 disp_adjust_unbound_pri(tp); 2581 2582 tp->t_bound_cpu = NULL; /* set new binding */ 2583 2584 /* 2585 * Move thread to lgroup with strongest affinity 2586 * after unbinding 2587 */ 2588 if (tp->t_lgrp_affinity) 2589 lgrp_move_thread(tp, 2590 lgrp_choose(tp, tp->t_cpupart), 1); 2591 2592 if (tp->t_state == TS_ONPROC && 2593 tp->t_cpu->cpu_part != tp->t_cpupart) 2594 cpu_surrender(tp); 2595 } else { 2596 lpl_t *lpl; 2597 2598 tp->t_bound_cpu = cp; 2599 ASSERT(cp->cpu_lpl != NULL); 2600 2601 /* 2602 * Set home to lgroup with most affinity containing CPU 2603 * that thread is being bound or minimum bounding 2604 * lgroup if no affinities set 2605 */ 2606 if (tp->t_lgrp_affinity) 2607 lpl = lgrp_affinity_best(tp, tp->t_cpupart, 2608 LGRP_NONE, B_FALSE); 2609 else 2610 lpl = cp->cpu_lpl; 2611 2612 if (tp->t_lpl != lpl) { 2613 /* can't grab cpu_lock */ 2614 lgrp_move_thread(tp, lpl, 1); 2615 } 2616 2617 /* 2618 * Make the thread switch to the bound CPU. 2619 * If the thread is runnable, we need to 2620 * requeue it even if t_cpu is already set 2621 * to the right CPU, since it may be on a 2622 * kpreempt queue and need to move to a local 2623 * queue. We could check t_disp_queue to 2624 * avoid unnecessary overhead if it's already 2625 * on the right queue, but since this isn't 2626 * a performance-critical operation it doesn't 2627 * seem worth the extra code and complexity. 2628 * 2629 * If the thread is weakbound to the cpu then it will 2630 * resist the new binding request until the weak 2631 * binding drops. The cpu_surrender or requeueing 2632 * below could be skipped in such cases (since it 2633 * will have no effect), but that would require 2634 * thread_allowmigrate to acquire thread_lock so 2635 * we'll take the very occasional hit here instead. 2636 */ 2637 if (tp->t_state == TS_ONPROC) { 2638 cpu_surrender(tp); 2639 } else if (tp->t_state == TS_RUN) { 2640 cpu_t *ocp = tp->t_cpu; 2641 2642 (void) dispdeq(tp); 2643 setbackdq(tp); 2644 /* 2645 * Either on the bound CPU's disp queue now, 2646 * or swapped out or on the swap queue. 2647 */ 2648 ASSERT(tp->t_disp_queue == cp->cpu_disp || 2649 tp->t_weakbound_cpu == ocp || 2650 (tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) 2651 != TS_LOAD); 2652 } 2653 } 2654 } 2655 2656 /* 2657 * Our binding has changed; set TP_CHANGEBIND. 2658 */ 2659 tp->t_proc_flag |= TP_CHANGEBIND; 2660 aston(tp); 2661 2662 thread_unlock(tp); 2663 2664 return (0); 2665 } 2666 2667 #if CPUSET_WORDS > 1 2668 2669 /* 2670 * Functions for implementing cpuset operations when a cpuset is more 2671 * than one word. On platforms where a cpuset is a single word these 2672 * are implemented as macros in cpuvar.h. 2673 */ 2674 2675 void 2676 cpuset_all(cpuset_t *s) 2677 { 2678 int i; 2679 2680 for (i = 0; i < CPUSET_WORDS; i++) 2681 s->cpub[i] = ~0UL; 2682 } 2683 2684 void 2685 cpuset_all_but(cpuset_t *s, uint_t cpu) 2686 { 2687 cpuset_all(s); 2688 CPUSET_DEL(*s, cpu); 2689 } 2690 2691 void 2692 cpuset_only(cpuset_t *s, uint_t cpu) 2693 { 2694 CPUSET_ZERO(*s); 2695 CPUSET_ADD(*s, cpu); 2696 } 2697 2698 int 2699 cpuset_isnull(cpuset_t *s) 2700 { 2701 int i; 2702 2703 for (i = 0; i < CPUSET_WORDS; i++) 2704 if (s->cpub[i] != 0) 2705 return (0); 2706 return (1); 2707 } 2708 2709 int 2710 cpuset_cmp(cpuset_t *s1, cpuset_t *s2) 2711 { 2712 int i; 2713 2714 for (i = 0; i < CPUSET_WORDS; i++) 2715 if (s1->cpub[i] != s2->cpub[i]) 2716 return (0); 2717 return (1); 2718 } 2719 2720 uint_t 2721 cpuset_find(cpuset_t *s) 2722 { 2723 2724 uint_t i; 2725 uint_t cpu = (uint_t)-1; 2726 2727 /* 2728 * Find a cpu in the cpuset 2729 */ 2730 for (i = 0; i < CPUSET_WORDS; i++) { 2731 cpu = (uint_t)(lowbit(s->cpub[i]) - 1); 2732 if (cpu != (uint_t)-1) { 2733 cpu += i * BT_NBIPUL; 2734 break; 2735 } 2736 } 2737 return (cpu); 2738 } 2739 2740 void 2741 cpuset_bounds(cpuset_t *s, uint_t *smallestid, uint_t *largestid) 2742 { 2743 int i, j; 2744 uint_t bit; 2745 2746 /* 2747 * First, find the smallest cpu id in the set. 2748 */ 2749 for (i = 0; i < CPUSET_WORDS; i++) { 2750 if (s->cpub[i] != 0) { 2751 bit = (uint_t)(lowbit(s->cpub[i]) - 1); 2752 ASSERT(bit != (uint_t)-1); 2753 *smallestid = bit + (i * BT_NBIPUL); 2754 2755 /* 2756 * Now find the largest cpu id in 2757 * the set and return immediately. 2758 * Done in an inner loop to avoid 2759 * having to break out of the first 2760 * loop. 2761 */ 2762 for (j = CPUSET_WORDS - 1; j >= i; j--) { 2763 if (s->cpub[j] != 0) { 2764 bit = (uint_t)(highbit(s->cpub[j]) - 1); 2765 ASSERT(bit != (uint_t)-1); 2766 *largestid = bit + (j * BT_NBIPUL); 2767 ASSERT(*largestid >= *smallestid); 2768 return; 2769 } 2770 } 2771 2772 /* 2773 * If this code is reached, a 2774 * smallestid was found, but not a 2775 * largestid. The cpuset must have 2776 * been changed during the course 2777 * of this function call. 2778 */ 2779 ASSERT(0); 2780 } 2781 } 2782 *smallestid = *largestid = CPUSET_NOTINSET; 2783 } 2784 2785 #endif /* CPUSET_WORDS */ 2786 2787 /* 2788 * Unbind threads bound to specified CPU. 2789 * 2790 * If `unbind_all_threads' is true, unbind all user threads bound to a given 2791 * CPU. Otherwise unbind all soft-bound user threads. 2792 */ 2793 int 2794 cpu_unbind(processorid_t cpu, boolean_t unbind_all_threads) 2795 { 2796 processorid_t obind; 2797 kthread_t *tp; 2798 int ret = 0; 2799 proc_t *pp; 2800 int err, berr = 0; 2801 2802 ASSERT(MUTEX_HELD(&cpu_lock)); 2803 2804 mutex_enter(&pidlock); 2805 for (pp = practive; pp != NULL; pp = pp->p_next) { 2806 mutex_enter(&pp->p_lock); 2807 tp = pp->p_tlist; 2808 /* 2809 * Skip zombies, kernel processes, and processes in 2810 * other zones, if called from a non-global zone. 2811 */ 2812 if (tp == NULL || (pp->p_flag & SSYS) || 2813 !HASZONEACCESS(curproc, pp->p_zone->zone_id)) { 2814 mutex_exit(&pp->p_lock); 2815 continue; 2816 } 2817 do { 2818 if (tp->t_bind_cpu != cpu) 2819 continue; 2820 /* 2821 * Skip threads with hard binding when 2822 * `unbind_all_threads' is not specified. 2823 */ 2824 if (!unbind_all_threads && TB_CPU_IS_HARD(tp)) 2825 continue; 2826 err = cpu_bind_thread(tp, PBIND_NONE, &obind, &berr); 2827 if (ret == 0) 2828 ret = err; 2829 } while ((tp = tp->t_forw) != pp->p_tlist); 2830 mutex_exit(&pp->p_lock); 2831 } 2832 mutex_exit(&pidlock); 2833 if (ret == 0) 2834 ret = berr; 2835 return (ret); 2836 } 2837 2838 2839 /* 2840 * Destroy all remaining bound threads on a cpu. 2841 */ 2842 void 2843 cpu_destroy_bound_threads(cpu_t *cp) 2844 { 2845 extern id_t syscid; 2846 register kthread_id_t t, tlist, tnext; 2847 2848 /* 2849 * Destroy all remaining bound threads on the cpu. This 2850 * should include both the interrupt threads and the idle thread. 2851 * This requires some care, since we need to traverse the 2852 * thread list with the pidlock mutex locked, but thread_free 2853 * also locks the pidlock mutex. So, we collect the threads 2854 * we're going to reap in a list headed by "tlist", then we 2855 * unlock the pidlock mutex and traverse the tlist list, 2856 * doing thread_free's on the thread's. Simple, n'est pas? 2857 * Also, this depends on thread_free not mucking with the 2858 * t_next and t_prev links of the thread. 2859 */ 2860 2861 if ((t = curthread) != NULL) { 2862 2863 tlist = NULL; 2864 mutex_enter(&pidlock); 2865 do { 2866 tnext = t->t_next; 2867 if (t->t_bound_cpu == cp) { 2868 2869 /* 2870 * We've found a bound thread, carefully unlink 2871 * it out of the thread list, and add it to 2872 * our "tlist". We "know" we don't have to 2873 * worry about unlinking curthread (the thread 2874 * that is executing this code). 2875 */ 2876 t->t_next->t_prev = t->t_prev; 2877 t->t_prev->t_next = t->t_next; 2878 t->t_next = tlist; 2879 tlist = t; 2880 ASSERT(t->t_cid == syscid); 2881 /* wake up anyone blocked in thread_join */ 2882 cv_broadcast(&t->t_joincv); 2883 /* 2884 * t_lwp set by interrupt threads and not 2885 * cleared. 2886 */ 2887 t->t_lwp = NULL; 2888 /* 2889 * Pause and idle threads always have 2890 * t_state set to TS_ONPROC. 2891 */ 2892 t->t_state = TS_FREE; 2893 t->t_prev = NULL; /* Just in case */ 2894 } 2895 2896 } while ((t = tnext) != curthread); 2897 2898 mutex_exit(&pidlock); 2899 2900 mutex_sync(); 2901 for (t = tlist; t != NULL; t = tnext) { 2902 tnext = t->t_next; 2903 thread_free(t); 2904 } 2905 } 2906 } 2907 2908 /* 2909 * Update the cpu_supp_freqs of this cpu. This information is returned 2910 * as part of cpu_info kstats. If the cpu_info_kstat exists already, then 2911 * maintain the kstat data size. 2912 */ 2913 void 2914 cpu_set_supp_freqs(cpu_t *cp, const char *freqs) 2915 { 2916 char clkstr[sizeof ("18446744073709551615") + 1]; /* ui64 MAX */ 2917 const char *lfreqs = clkstr; 2918 boolean_t kstat_exists = B_FALSE; 2919 kstat_t *ksp; 2920 size_t len; 2921 2922 /* 2923 * A NULL pointer means we only support one speed. 2924 */ 2925 if (freqs == NULL) 2926 (void) snprintf(clkstr, sizeof (clkstr), "%"PRIu64, 2927 cp->cpu_curr_clock); 2928 else 2929 lfreqs = freqs; 2930 2931 /* 2932 * Make sure the frequency doesn't change while a snapshot is 2933 * going on. Of course, we only need to worry about this if 2934 * the kstat exists. 2935 */ 2936 if ((ksp = cp->cpu_info_kstat) != NULL) { 2937 mutex_enter(ksp->ks_lock); 2938 kstat_exists = B_TRUE; 2939 } 2940 2941 /* 2942 * Free any previously allocated string and if the kstat 2943 * already exists, then update its data size. 2944 */ 2945 if (cp->cpu_supp_freqs != NULL) { 2946 len = strlen(cp->cpu_supp_freqs) + 1; 2947 kmem_free(cp->cpu_supp_freqs, len); 2948 if (kstat_exists) 2949 ksp->ks_data_size -= len; 2950 } 2951 2952 /* 2953 * Allocate the new string and set the pointer. 2954 */ 2955 len = strlen(lfreqs) + 1; 2956 cp->cpu_supp_freqs = kmem_alloc(len, KM_SLEEP); 2957 (void) strcpy(cp->cpu_supp_freqs, lfreqs); 2958 2959 /* 2960 * If the kstat already exists then update the data size and 2961 * free the lock. 2962 */ 2963 if (kstat_exists) { 2964 ksp->ks_data_size += len; 2965 mutex_exit(ksp->ks_lock); 2966 } 2967 } 2968 2969 /* 2970 * Indicate the current CPU's clock freqency (in Hz). 2971 * The calling context must be such that CPU references are safe. 2972 */ 2973 void 2974 cpu_set_curr_clock(uint64_t new_clk) 2975 { 2976 uint64_t old_clk; 2977 2978 old_clk = CPU->cpu_curr_clock; 2979 CPU->cpu_curr_clock = new_clk; 2980 2981 /* 2982 * The cpu-change-speed DTrace probe exports the frequency in Hz 2983 */ 2984 DTRACE_PROBE3(cpu__change__speed, processorid_t, CPU->cpu_id, 2985 uint64_t, old_clk, uint64_t, new_clk); 2986 } 2987 2988 /* 2989 * processor_info(2) and p_online(2) status support functions 2990 * The constants returned by the cpu_get_state() and cpu_get_state_str() are 2991 * for use in communicating processor state information to userland. Kernel 2992 * subsystems should only be using the cpu_flags value directly. Subsystems 2993 * modifying cpu_flags should record the state change via a call to the 2994 * cpu_set_state(). 2995 */ 2996 2997 /* 2998 * Update the pi_state of this CPU. This function provides the CPU status for 2999 * the information returned by processor_info(2). 3000 */ 3001 void 3002 cpu_set_state(cpu_t *cpu) 3003 { 3004 ASSERT(MUTEX_HELD(&cpu_lock)); 3005 cpu->cpu_type_info.pi_state = cpu_get_state(cpu); 3006 cpu->cpu_state_begin = gethrestime_sec(); 3007 pool_cpu_mod = gethrtime(); 3008 } 3009 3010 /* 3011 * Return offline/online/other status for the indicated CPU. Use only for 3012 * communication with user applications; cpu_flags provides the in-kernel 3013 * interface. 3014 */ 3015 int 3016 cpu_get_state(cpu_t *cpu) 3017 { 3018 ASSERT(MUTEX_HELD(&cpu_lock)); 3019 if (cpu->cpu_flags & CPU_POWEROFF) 3020 return (P_POWEROFF); 3021 else if (cpu->cpu_flags & CPU_FAULTED) 3022 return (P_FAULTED); 3023 else if (cpu->cpu_flags & CPU_SPARE) 3024 return (P_SPARE); 3025 else if ((cpu->cpu_flags & (CPU_READY | CPU_OFFLINE)) != CPU_READY) 3026 return (P_OFFLINE); 3027 else if (cpu->cpu_flags & CPU_ENABLE) 3028 return (P_ONLINE); 3029 else 3030 return (P_NOINTR); 3031 } 3032 3033 /* 3034 * Return processor_info(2) state as a string. 3035 */ 3036 const char * 3037 cpu_get_state_str(cpu_t *cpu) 3038 { 3039 const char *string; 3040 3041 switch (cpu_get_state(cpu)) { 3042 case P_ONLINE: 3043 string = PS_ONLINE; 3044 break; 3045 case P_POWEROFF: 3046 string = PS_POWEROFF; 3047 break; 3048 case P_NOINTR: 3049 string = PS_NOINTR; 3050 break; 3051 case P_SPARE: 3052 string = PS_SPARE; 3053 break; 3054 case P_FAULTED: 3055 string = PS_FAULTED; 3056 break; 3057 case P_OFFLINE: 3058 string = PS_OFFLINE; 3059 break; 3060 default: 3061 string = "unknown"; 3062 break; 3063 } 3064 return (string); 3065 } 3066 3067 /* 3068 * Export this CPU's statistics (cpu_stat_t and cpu_stats_t) as raw and named 3069 * kstats, respectively. This is done when a CPU is initialized or placed 3070 * online via p_online(2). 3071 */ 3072 static void 3073 cpu_stats_kstat_create(cpu_t *cp) 3074 { 3075 int instance = cp->cpu_id; 3076 char *module = "cpu"; 3077 char *class = "misc"; 3078 kstat_t *ksp; 3079 zoneid_t zoneid; 3080 3081 ASSERT(MUTEX_HELD(&cpu_lock)); 3082 3083 if (pool_pset_enabled()) 3084 zoneid = GLOBAL_ZONEID; 3085 else 3086 zoneid = ALL_ZONES; 3087 /* 3088 * Create named kstats 3089 */ 3090 #define CPU_STATS_KS_CREATE(name, tsize, update_func) \ 3091 ksp = kstat_create_zone(module, instance, (name), class, \ 3092 KSTAT_TYPE_NAMED, (tsize) / sizeof (kstat_named_t), 0, \ 3093 zoneid); \ 3094 if (ksp != NULL) { \ 3095 ksp->ks_private = cp; \ 3096 ksp->ks_update = (update_func); \ 3097 kstat_install(ksp); \ 3098 } else \ 3099 cmn_err(CE_WARN, "cpu: unable to create %s:%d:%s kstat", \ 3100 module, instance, (name)); 3101 3102 CPU_STATS_KS_CREATE("sys", sizeof (cpu_sys_stats_ks_data_template), 3103 cpu_sys_stats_ks_update); 3104 CPU_STATS_KS_CREATE("vm", sizeof (cpu_vm_stats_ks_data_template), 3105 cpu_vm_stats_ks_update); 3106 3107 /* 3108 * Export the familiar cpu_stat_t KSTAT_TYPE_RAW kstat. 3109 */ 3110 ksp = kstat_create_zone("cpu_stat", cp->cpu_id, NULL, 3111 "misc", KSTAT_TYPE_RAW, sizeof (cpu_stat_t), 0, zoneid); 3112 if (ksp != NULL) { 3113 ksp->ks_update = cpu_stat_ks_update; 3114 ksp->ks_private = cp; 3115 kstat_install(ksp); 3116 } 3117 } 3118 3119 static void 3120 cpu_stats_kstat_destroy(cpu_t *cp) 3121 { 3122 char ks_name[KSTAT_STRLEN]; 3123 3124 (void) sprintf(ks_name, "cpu_stat%d", cp->cpu_id); 3125 kstat_delete_byname("cpu_stat", cp->cpu_id, ks_name); 3126 3127 kstat_delete_byname("cpu", cp->cpu_id, "sys"); 3128 kstat_delete_byname("cpu", cp->cpu_id, "vm"); 3129 } 3130 3131 static int 3132 cpu_sys_stats_ks_update(kstat_t *ksp, int rw) 3133 { 3134 cpu_t *cp = (cpu_t *)ksp->ks_private; 3135 struct cpu_sys_stats_ks_data *csskd; 3136 cpu_sys_stats_t *css; 3137 hrtime_t msnsecs[NCMSTATES]; 3138 int i; 3139 3140 if (rw == KSTAT_WRITE) 3141 return (EACCES); 3142 3143 csskd = ksp->ks_data; 3144 css = &cp->cpu_stats.sys; 3145 3146 /* 3147 * Read CPU mstate, but compare with the last values we 3148 * received to make sure that the returned kstats never 3149 * decrease. 3150 */ 3151 3152 get_cpu_mstate(cp, msnsecs); 3153 if (csskd->cpu_nsec_idle.value.ui64 > msnsecs[CMS_IDLE]) 3154 msnsecs[CMS_IDLE] = csskd->cpu_nsec_idle.value.ui64; 3155 if (csskd->cpu_nsec_user.value.ui64 > msnsecs[CMS_USER]) 3156 msnsecs[CMS_USER] = csskd->cpu_nsec_user.value.ui64; 3157 if (csskd->cpu_nsec_kernel.value.ui64 > msnsecs[CMS_SYSTEM]) 3158 msnsecs[CMS_SYSTEM] = csskd->cpu_nsec_kernel.value.ui64; 3159 3160 bcopy(&cpu_sys_stats_ks_data_template, ksp->ks_data, 3161 sizeof (cpu_sys_stats_ks_data_template)); 3162 3163 csskd->cpu_ticks_wait.value.ui64 = 0; 3164 csskd->wait_ticks_io.value.ui64 = 0; 3165 3166 csskd->cpu_nsec_idle.value.ui64 = msnsecs[CMS_IDLE]; 3167 csskd->cpu_nsec_user.value.ui64 = msnsecs[CMS_USER]; 3168 csskd->cpu_nsec_kernel.value.ui64 = msnsecs[CMS_SYSTEM]; 3169 csskd->cpu_ticks_idle.value.ui64 = 3170 NSEC_TO_TICK(csskd->cpu_nsec_idle.value.ui64); 3171 csskd->cpu_ticks_user.value.ui64 = 3172 NSEC_TO_TICK(csskd->cpu_nsec_user.value.ui64); 3173 csskd->cpu_ticks_kernel.value.ui64 = 3174 NSEC_TO_TICK(csskd->cpu_nsec_kernel.value.ui64); 3175 csskd->cpu_nsec_intr.value.ui64 = cp->cpu_intrlast; 3176 csskd->cpu_load_intr.value.ui64 = cp->cpu_intrload; 3177 csskd->bread.value.ui64 = css->bread; 3178 csskd->bwrite.value.ui64 = css->bwrite; 3179 csskd->lread.value.ui64 = css->lread; 3180 csskd->lwrite.value.ui64 = css->lwrite; 3181 csskd->phread.value.ui64 = css->phread; 3182 csskd->phwrite.value.ui64 = css->phwrite; 3183 csskd->pswitch.value.ui64 = css->pswitch; 3184 csskd->trap.value.ui64 = css->trap; 3185 csskd->intr.value.ui64 = 0; 3186 for (i = 0; i < PIL_MAX; i++) 3187 csskd->intr.value.ui64 += css->intr[i]; 3188 csskd->syscall.value.ui64 = css->syscall; 3189 csskd->sysread.value.ui64 = css->sysread; 3190 csskd->syswrite.value.ui64 = css->syswrite; 3191 csskd->sysfork.value.ui64 = css->sysfork; 3192 csskd->sysvfork.value.ui64 = css->sysvfork; 3193 csskd->sysexec.value.ui64 = css->sysexec; 3194 csskd->readch.value.ui64 = css->readch; 3195 csskd->writech.value.ui64 = css->writech; 3196 csskd->rcvint.value.ui64 = css->rcvint; 3197 csskd->xmtint.value.ui64 = css->xmtint; 3198 csskd->mdmint.value.ui64 = css->mdmint; 3199 csskd->rawch.value.ui64 = css->rawch; 3200 csskd->canch.value.ui64 = css->canch; 3201 csskd->outch.value.ui64 = css->outch; 3202 csskd->msg.value.ui64 = css->msg; 3203 csskd->sema.value.ui64 = css->sema; 3204 csskd->namei.value.ui64 = css->namei; 3205 csskd->ufsiget.value.ui64 = css->ufsiget; 3206 csskd->ufsdirblk.value.ui64 = css->ufsdirblk; 3207 csskd->ufsipage.value.ui64 = css->ufsipage; 3208 csskd->ufsinopage.value.ui64 = css->ufsinopage; 3209 csskd->procovf.value.ui64 = css->procovf; 3210 csskd->intrthread.value.ui64 = 0; 3211 for (i = 0; i < LOCK_LEVEL - 1; i++) 3212 csskd->intrthread.value.ui64 += css->intr[i]; 3213 csskd->intrblk.value.ui64 = css->intrblk; 3214 csskd->intrunpin.value.ui64 = css->intrunpin; 3215 csskd->idlethread.value.ui64 = css->idlethread; 3216 csskd->inv_swtch.value.ui64 = css->inv_swtch; 3217 csskd->nthreads.value.ui64 = css->nthreads; 3218 csskd->cpumigrate.value.ui64 = css->cpumigrate; 3219 csskd->xcalls.value.ui64 = css->xcalls; 3220 csskd->mutex_adenters.value.ui64 = css->mutex_adenters; 3221 csskd->rw_rdfails.value.ui64 = css->rw_rdfails; 3222 csskd->rw_wrfails.value.ui64 = css->rw_wrfails; 3223 csskd->modload.value.ui64 = css->modload; 3224 csskd->modunload.value.ui64 = css->modunload; 3225 csskd->bawrite.value.ui64 = css->bawrite; 3226 csskd->iowait.value.ui64 = css->iowait; 3227 3228 return (0); 3229 } 3230 3231 static int 3232 cpu_vm_stats_ks_update(kstat_t *ksp, int rw) 3233 { 3234 cpu_t *cp = (cpu_t *)ksp->ks_private; 3235 struct cpu_vm_stats_ks_data *cvskd; 3236 cpu_vm_stats_t *cvs; 3237 3238 if (rw == KSTAT_WRITE) 3239 return (EACCES); 3240 3241 cvs = &cp->cpu_stats.vm; 3242 cvskd = ksp->ks_data; 3243 3244 bcopy(&cpu_vm_stats_ks_data_template, ksp->ks_data, 3245 sizeof (cpu_vm_stats_ks_data_template)); 3246 cvskd->pgrec.value.ui64 = cvs->pgrec; 3247 cvskd->pgfrec.value.ui64 = cvs->pgfrec; 3248 cvskd->pgin.value.ui64 = cvs->pgin; 3249 cvskd->pgpgin.value.ui64 = cvs->pgpgin; 3250 cvskd->pgout.value.ui64 = cvs->pgout; 3251 cvskd->pgpgout.value.ui64 = cvs->pgpgout; 3252 cvskd->swapin.value.ui64 = cvs->swapin; 3253 cvskd->pgswapin.value.ui64 = cvs->pgswapin; 3254 cvskd->swapout.value.ui64 = cvs->swapout; 3255 cvskd->pgswapout.value.ui64 = cvs->pgswapout; 3256 cvskd->zfod.value.ui64 = cvs->zfod; 3257 cvskd->dfree.value.ui64 = cvs->dfree; 3258 cvskd->scan.value.ui64 = cvs->scan; 3259 cvskd->rev.value.ui64 = cvs->rev; 3260 cvskd->hat_fault.value.ui64 = cvs->hat_fault; 3261 cvskd->as_fault.value.ui64 = cvs->as_fault; 3262 cvskd->maj_fault.value.ui64 = cvs->maj_fault; 3263 cvskd->cow_fault.value.ui64 = cvs->cow_fault; 3264 cvskd->prot_fault.value.ui64 = cvs->prot_fault; 3265 cvskd->softlock.value.ui64 = cvs->softlock; 3266 cvskd->kernel_asflt.value.ui64 = cvs->kernel_asflt; 3267 cvskd->pgrrun.value.ui64 = cvs->pgrrun; 3268 cvskd->execpgin.value.ui64 = cvs->execpgin; 3269 cvskd->execpgout.value.ui64 = cvs->execpgout; 3270 cvskd->execfree.value.ui64 = cvs->execfree; 3271 cvskd->anonpgin.value.ui64 = cvs->anonpgin; 3272 cvskd->anonpgout.value.ui64 = cvs->anonpgout; 3273 cvskd->anonfree.value.ui64 = cvs->anonfree; 3274 cvskd->fspgin.value.ui64 = cvs->fspgin; 3275 cvskd->fspgout.value.ui64 = cvs->fspgout; 3276 cvskd->fsfree.value.ui64 = cvs->fsfree; 3277 3278 return (0); 3279 } 3280 3281 static int 3282 cpu_stat_ks_update(kstat_t *ksp, int rw) 3283 { 3284 cpu_stat_t *cso; 3285 cpu_t *cp; 3286 int i; 3287 hrtime_t msnsecs[NCMSTATES]; 3288 3289 cso = (cpu_stat_t *)ksp->ks_data; 3290 cp = (cpu_t *)ksp->ks_private; 3291 3292 if (rw == KSTAT_WRITE) 3293 return (EACCES); 3294 3295 /* 3296 * Read CPU mstate, but compare with the last values we 3297 * received to make sure that the returned kstats never 3298 * decrease. 3299 */ 3300 3301 get_cpu_mstate(cp, msnsecs); 3302 msnsecs[CMS_IDLE] = NSEC_TO_TICK(msnsecs[CMS_IDLE]); 3303 msnsecs[CMS_USER] = NSEC_TO_TICK(msnsecs[CMS_USER]); 3304 msnsecs[CMS_SYSTEM] = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]); 3305 if (cso->cpu_sysinfo.cpu[CPU_IDLE] < msnsecs[CMS_IDLE]) 3306 cso->cpu_sysinfo.cpu[CPU_IDLE] = msnsecs[CMS_IDLE]; 3307 if (cso->cpu_sysinfo.cpu[CPU_USER] < msnsecs[CMS_USER]) 3308 cso->cpu_sysinfo.cpu[CPU_USER] = msnsecs[CMS_USER]; 3309 if (cso->cpu_sysinfo.cpu[CPU_KERNEL] < msnsecs[CMS_SYSTEM]) 3310 cso->cpu_sysinfo.cpu[CPU_KERNEL] = msnsecs[CMS_SYSTEM]; 3311 cso->cpu_sysinfo.cpu[CPU_WAIT] = 0; 3312 cso->cpu_sysinfo.wait[W_IO] = 0; 3313 cso->cpu_sysinfo.wait[W_SWAP] = 0; 3314 cso->cpu_sysinfo.wait[W_PIO] = 0; 3315 cso->cpu_sysinfo.bread = CPU_STATS(cp, sys.bread); 3316 cso->cpu_sysinfo.bwrite = CPU_STATS(cp, sys.bwrite); 3317 cso->cpu_sysinfo.lread = CPU_STATS(cp, sys.lread); 3318 cso->cpu_sysinfo.lwrite = CPU_STATS(cp, sys.lwrite); 3319 cso->cpu_sysinfo.phread = CPU_STATS(cp, sys.phread); 3320 cso->cpu_sysinfo.phwrite = CPU_STATS(cp, sys.phwrite); 3321 cso->cpu_sysinfo.pswitch = CPU_STATS(cp, sys.pswitch); 3322 cso->cpu_sysinfo.trap = CPU_STATS(cp, sys.trap); 3323 cso->cpu_sysinfo.intr = 0; 3324 for (i = 0; i < PIL_MAX; i++) 3325 cso->cpu_sysinfo.intr += CPU_STATS(cp, sys.intr[i]); 3326 cso->cpu_sysinfo.syscall = CPU_STATS(cp, sys.syscall); 3327 cso->cpu_sysinfo.sysread = CPU_STATS(cp, sys.sysread); 3328 cso->cpu_sysinfo.syswrite = CPU_STATS(cp, sys.syswrite); 3329 cso->cpu_sysinfo.sysfork = CPU_STATS(cp, sys.sysfork); 3330 cso->cpu_sysinfo.sysvfork = CPU_STATS(cp, sys.sysvfork); 3331 cso->cpu_sysinfo.sysexec = CPU_STATS(cp, sys.sysexec); 3332 cso->cpu_sysinfo.readch = CPU_STATS(cp, sys.readch); 3333 cso->cpu_sysinfo.writech = CPU_STATS(cp, sys.writech); 3334 cso->cpu_sysinfo.rcvint = CPU_STATS(cp, sys.rcvint); 3335 cso->cpu_sysinfo.xmtint = CPU_STATS(cp, sys.xmtint); 3336 cso->cpu_sysinfo.mdmint = CPU_STATS(cp, sys.mdmint); 3337 cso->cpu_sysinfo.rawch = CPU_STATS(cp, sys.rawch); 3338 cso->cpu_sysinfo.canch = CPU_STATS(cp, sys.canch); 3339 cso->cpu_sysinfo.outch = CPU_STATS(cp, sys.outch); 3340 cso->cpu_sysinfo.msg = CPU_STATS(cp, sys.msg); 3341 cso->cpu_sysinfo.sema = CPU_STATS(cp, sys.sema); 3342 cso->cpu_sysinfo.namei = CPU_STATS(cp, sys.namei); 3343 cso->cpu_sysinfo.ufsiget = CPU_STATS(cp, sys.ufsiget); 3344 cso->cpu_sysinfo.ufsdirblk = CPU_STATS(cp, sys.ufsdirblk); 3345 cso->cpu_sysinfo.ufsipage = CPU_STATS(cp, sys.ufsipage); 3346 cso->cpu_sysinfo.ufsinopage = CPU_STATS(cp, sys.ufsinopage); 3347 cso->cpu_sysinfo.inodeovf = 0; 3348 cso->cpu_sysinfo.fileovf = 0; 3349 cso->cpu_sysinfo.procovf = CPU_STATS(cp, sys.procovf); 3350 cso->cpu_sysinfo.intrthread = 0; 3351 for (i = 0; i < LOCK_LEVEL - 1; i++) 3352 cso->cpu_sysinfo.intrthread += CPU_STATS(cp, sys.intr[i]); 3353 cso->cpu_sysinfo.intrblk = CPU_STATS(cp, sys.intrblk); 3354 cso->cpu_sysinfo.idlethread = CPU_STATS(cp, sys.idlethread); 3355 cso->cpu_sysinfo.inv_swtch = CPU_STATS(cp, sys.inv_swtch); 3356 cso->cpu_sysinfo.nthreads = CPU_STATS(cp, sys.nthreads); 3357 cso->cpu_sysinfo.cpumigrate = CPU_STATS(cp, sys.cpumigrate); 3358 cso->cpu_sysinfo.xcalls = CPU_STATS(cp, sys.xcalls); 3359 cso->cpu_sysinfo.mutex_adenters = CPU_STATS(cp, sys.mutex_adenters); 3360 cso->cpu_sysinfo.rw_rdfails = CPU_STATS(cp, sys.rw_rdfails); 3361 cso->cpu_sysinfo.rw_wrfails = CPU_STATS(cp, sys.rw_wrfails); 3362 cso->cpu_sysinfo.modload = CPU_STATS(cp, sys.modload); 3363 cso->cpu_sysinfo.modunload = CPU_STATS(cp, sys.modunload); 3364 cso->cpu_sysinfo.bawrite = CPU_STATS(cp, sys.bawrite); 3365 cso->cpu_sysinfo.rw_enters = 0; 3366 cso->cpu_sysinfo.win_uo_cnt = 0; 3367 cso->cpu_sysinfo.win_uu_cnt = 0; 3368 cso->cpu_sysinfo.win_so_cnt = 0; 3369 cso->cpu_sysinfo.win_su_cnt = 0; 3370 cso->cpu_sysinfo.win_suo_cnt = 0; 3371 3372 cso->cpu_syswait.iowait = CPU_STATS(cp, sys.iowait); 3373 cso->cpu_syswait.swap = 0; 3374 cso->cpu_syswait.physio = 0; 3375 3376 cso->cpu_vminfo.pgrec = CPU_STATS(cp, vm.pgrec); 3377 cso->cpu_vminfo.pgfrec = CPU_STATS(cp, vm.pgfrec); 3378 cso->cpu_vminfo.pgin = CPU_STATS(cp, vm.pgin); 3379 cso->cpu_vminfo.pgpgin = CPU_STATS(cp, vm.pgpgin); 3380 cso->cpu_vminfo.pgout = CPU_STATS(cp, vm.pgout); 3381 cso->cpu_vminfo.pgpgout = CPU_STATS(cp, vm.pgpgout); 3382 cso->cpu_vminfo.swapin = CPU_STATS(cp, vm.swapin); 3383 cso->cpu_vminfo.pgswapin = CPU_STATS(cp, vm.pgswapin); 3384 cso->cpu_vminfo.swapout = CPU_STATS(cp, vm.swapout); 3385 cso->cpu_vminfo.pgswapout = CPU_STATS(cp, vm.pgswapout); 3386 cso->cpu_vminfo.zfod = CPU_STATS(cp, vm.zfod); 3387 cso->cpu_vminfo.dfree = CPU_STATS(cp, vm.dfree); 3388 cso->cpu_vminfo.scan = CPU_STATS(cp, vm.scan); 3389 cso->cpu_vminfo.rev = CPU_STATS(cp, vm.rev); 3390 cso->cpu_vminfo.hat_fault = CPU_STATS(cp, vm.hat_fault); 3391 cso->cpu_vminfo.as_fault = CPU_STATS(cp, vm.as_fault); 3392 cso->cpu_vminfo.maj_fault = CPU_STATS(cp, vm.maj_fault); 3393 cso->cpu_vminfo.cow_fault = CPU_STATS(cp, vm.cow_fault); 3394 cso->cpu_vminfo.prot_fault = CPU_STATS(cp, vm.prot_fault); 3395 cso->cpu_vminfo.softlock = CPU_STATS(cp, vm.softlock); 3396 cso->cpu_vminfo.kernel_asflt = CPU_STATS(cp, vm.kernel_asflt); 3397 cso->cpu_vminfo.pgrrun = CPU_STATS(cp, vm.pgrrun); 3398 cso->cpu_vminfo.execpgin = CPU_STATS(cp, vm.execpgin); 3399 cso->cpu_vminfo.execpgout = CPU_STATS(cp, vm.execpgout); 3400 cso->cpu_vminfo.execfree = CPU_STATS(cp, vm.execfree); 3401 cso->cpu_vminfo.anonpgin = CPU_STATS(cp, vm.anonpgin); 3402 cso->cpu_vminfo.anonpgout = CPU_STATS(cp, vm.anonpgout); 3403 cso->cpu_vminfo.anonfree = CPU_STATS(cp, vm.anonfree); 3404 cso->cpu_vminfo.fspgin = CPU_STATS(cp, vm.fspgin); 3405 cso->cpu_vminfo.fspgout = CPU_STATS(cp, vm.fspgout); 3406 cso->cpu_vminfo.fsfree = CPU_STATS(cp, vm.fsfree); 3407 3408 return (0); 3409 } 3410