1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, Intel Corporation. 23 * All rights reserved. 24 */ 25 26 /* 27 * Introduction 28 * This file implements a CPU event notification mechanism to signal clients 29 * which are interested in CPU related events. 30 * Currently it only supports CPU idle state change events which will be 31 * triggered just before CPU entering hardware idle state and just after CPU 32 * wakes up from hardware idle state. 33 * Please refer to PSARC/2009/115 for detail information. 34 * 35 * Lock Strategy 36 * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock. 37 * 2) No protection for cpu_idle_cb_state because it's per-CPU data. 38 * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock. 39 * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic. 40 * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and 41 * pause_cpus/start_cpus logic. 42 * We have optimized the algorithm for hot path on read side access. 43 * In the current algorithm, it's lock free on read side access. 44 * On write side, we use pause_cpus() to keep other CPUs in the pause thread, 45 * which will guarantee that no other threads will access 46 * cpu_idle_cb_max/curr/array data structure. 47 */ 48 49 #include <sys/types.h> 50 #include <sys/cmn_err.h> 51 #include <sys/cpuvar.h> 52 #include <sys/cpu.h> 53 #include <sys/kmem.h> 54 #include <sys/machcpuvar.h> 55 #include <sys/sdt.h> 56 #include <sys/sysmacros.h> 57 #include <sys/synch.h> 58 #include <sys/systm.h> 59 #include <sys/sunddi.h> 60 #if defined(__sparc) 61 #include <sys/machsystm.h> 62 #elif defined(__x86) 63 #include <sys/archsystm.h> 64 #endif 65 #include <sys/cpu_event.h> 66 67 /* Define normal state for CPU on different platforms. */ 68 #if defined(__x86) 69 #define CPU_IDLE_STATE_NORMAL IDLE_STATE_C0 70 #elif defined(__sparc) 71 /* 72 * At the time of this implementation IDLE_STATE_NORMAL is defined 73 * in mach_startup.c, and not in a header file. So if we find it is 74 * undefined, then we set it to the value as defined in mach_startup.c 75 * Should it eventually be defined, we will pick it up. 76 */ 77 #ifndef IDLE_STATE_NORMAL 78 #define IDLE_STATE_NORMAL 0 79 #endif 80 #define CPU_IDLE_STATE_NORMAL IDLE_STATE_NORMAL 81 #endif 82 83 /* 84 * To improve cache efficiency and avoid cache false sharing, CPU idle 85 * properties are grouped into cache lines as below: 86 * | CPU0 | CPU1 |.........| CPUn | 87 * | cache line 0 | cache line 1 |.........| cache line n | 88 * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm | 89 * To access value of property m for CPU n, using following value as index: 90 * index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m. 91 */ 92 #define CPU_IDLE_VALUE_GROUP_SIZE \ 93 (CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t)) 94 95 /* Get callback context handle for current CPU. */ 96 #define CPU_IDLE_GET_CTX(cp) \ 97 ((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid)) 98 99 /* Get CPU sequential id from ctx. */ 100 #define CPU_IDLE_CTX2CPUID(ctx) ((processorid_t)(intptr_t)(ctx)) 101 102 /* Compute index from callback context handle. */ 103 #define CPU_IDLE_CTX2IDX(ctx) \ 104 (((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE) 105 106 #define CPU_IDLE_HDL2VALP(hdl, idx) \ 107 (&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)]) 108 109 /* 110 * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC 111 * entries every time. Here we prefer linear growth instead of exponential. 112 */ 113 #define CPU_IDLE_ARRAY_CAPACITY_INC 0x10 114 115 typedef struct cpu_idle_prop_impl { 116 cpu_idle_prop_value_t *value; 117 struct cpu_idle_prop_impl *next; 118 char *name; 119 cpu_idle_prop_update_t update; 120 void *private; 121 cpu_idle_prop_type_t type; 122 uint32_t refcnt; 123 } cpu_idle_prop_impl_t; 124 125 typedef struct cpu_idle_prop_item { 126 cpu_idle_prop_type_t type; 127 char *name; 128 cpu_idle_prop_update_t update; 129 void *arg; 130 cpu_idle_prop_handle_t handle; 131 } cpu_idle_prop_item_t; 132 133 /* Structure to maintain registered callbacks in list. */ 134 typedef struct cpu_idle_cb_impl { 135 struct cpu_idle_cb_impl *next; 136 cpu_idle_callback_t *callback; 137 void *argument; 138 int priority; 139 } cpu_idle_cb_impl_t; 140 141 /* 142 * Structure to maintain registered callbacks in priority order and also 143 * optimized for cache efficiency for reading access. 144 */ 145 typedef struct cpu_idle_cb_item { 146 cpu_idle_enter_cbfn_t enter; 147 cpu_idle_exit_cbfn_t exit; 148 void *arg; 149 cpu_idle_cb_impl_t *impl; 150 } cpu_idle_cb_item_t; 151 152 /* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */ 153 typedef union cpu_idle_cb_state { 154 struct { 155 int index; 156 boolean_t ready; 157 cpu_idle_prop_value_t *idle_state; 158 cpu_idle_prop_value_t *enter_ts; 159 cpu_idle_prop_value_t *exit_ts; 160 cpu_idle_prop_value_t *last_idle; 161 cpu_idle_prop_value_t *last_busy; 162 cpu_idle_prop_value_t *total_idle; 163 cpu_idle_prop_value_t *total_busy; 164 cpu_idle_prop_value_t *intr_cnt; 165 } v; 166 #ifdef _LP64 167 char align[2 * CPU_CACHE_COHERENCE_SIZE]; 168 #else 169 char align[CPU_CACHE_COHERENCE_SIZE]; 170 #endif 171 } cpu_idle_cb_state_t; 172 173 static kmutex_t cpu_idle_prop_lock; 174 static cpu_idle_prop_impl_t *cpu_idle_prop_busy = NULL; 175 static cpu_idle_prop_impl_t *cpu_idle_prop_free = NULL; 176 177 static kmutex_t cpu_idle_cb_lock; 178 static cpu_idle_cb_impl_t *cpu_idle_cb_busy = NULL; 179 static cpu_idle_cb_item_t *cpu_idle_cb_array = NULL; 180 static int cpu_idle_cb_curr = 0; 181 static int cpu_idle_cb_max = 0; 182 183 static cpu_idle_cb_state_t *cpu_idle_cb_state; 184 185 static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, 186 cpu_idle_prop_value_t *valp); 187 188 static cpu_idle_prop_item_t cpu_idle_prop_array[] = { 189 { 190 CPU_IDLE_PROP_TYPE_INTPTR, CPU_IDLE_PROP_IDLE_STATE, 191 NULL, NULL, NULL 192 }, 193 { 194 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_ENTER_TIMESTAMP, 195 NULL, NULL, NULL 196 }, 197 { 198 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_EXIT_TIMESTAMP, 199 NULL, NULL, NULL 200 }, 201 { 202 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_IDLE_TIME, 203 NULL, NULL, NULL 204 }, 205 { 206 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_LAST_BUSY_TIME, 207 NULL, NULL, NULL 208 }, 209 { 210 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_IDLE_TIME, 211 NULL, NULL, NULL 212 }, 213 { 214 CPU_IDLE_PROP_TYPE_HRTIME, CPU_IDLE_PROP_TOTAL_BUSY_TIME, 215 NULL, NULL, NULL 216 }, 217 { 218 CPU_IDLE_PROP_TYPE_UINT64, CPU_IDLE_PROP_INTERRUPT_COUNT, 219 cpu_idle_prop_update_intr_cnt, NULL, NULL 220 }, 221 }; 222 223 #define CPU_IDLE_PROP_IDX_IDLE_STATE 0 224 #define CPU_IDLE_PROP_IDX_ENTER_TS 1 225 #define CPU_IDLE_PROP_IDX_EXIT_TS 2 226 #define CPU_IDLE_PROP_IDX_LAST_IDLE 3 227 #define CPU_IDLE_PROP_IDX_LAST_BUSY 4 228 #define CPU_IDLE_PROP_IDX_TOTAL_IDLE 5 229 #define CPU_IDLE_PROP_IDX_TOTAL_BUSY 6 230 #define CPU_IDLE_PROP_IDX_INTR_CNT 7 231 232 /*ARGSUSED*/ 233 static void 234 cpu_idle_dtrace_enter(void *arg, cpu_idle_callback_context_t ctx, 235 cpu_idle_check_wakeup_t check_func, void *check_arg) 236 { 237 int state; 238 239 state = cpu_idle_prop_get_intptr( 240 cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, ctx); 241 DTRACE_PROBE1(idle__state__transition, uint_t, state); 242 } 243 244 /*ARGSUSED*/ 245 static void 246 cpu_idle_dtrace_exit(void *arg, cpu_idle_callback_context_t ctx, int flag) 247 { 248 DTRACE_PROBE1(idle__state__transition, uint_t, CPU_IDLE_STATE_NORMAL); 249 } 250 251 static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace; 252 static cpu_idle_callback_t cpu_idle_callback_dtrace = { 253 CPU_IDLE_CALLBACK_VERS, 254 cpu_idle_dtrace_enter, 255 cpu_idle_dtrace_exit, 256 }; 257 258 #if defined(__x86) && !defined(__xpv) 259 extern void tlb_going_idle(void); 260 extern void tlb_service(void); 261 262 static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb; 263 static cpu_idle_callback_t cpu_idle_callback_tlb = { 264 CPU_IDLE_CALLBACK_VERS, 265 (cpu_idle_enter_cbfn_t)tlb_going_idle, 266 (cpu_idle_exit_cbfn_t)tlb_service, 267 }; 268 #endif 269 270 void 271 cpu_event_init(void) 272 { 273 int i, idx; 274 size_t sz; 275 intptr_t buf; 276 cpu_idle_cb_state_t *sp; 277 cpu_idle_prop_item_t *ip; 278 279 mutex_init(&cpu_idle_cb_lock, NULL, MUTEX_DRIVER, NULL); 280 mutex_init(&cpu_idle_prop_lock, NULL, MUTEX_DRIVER, NULL); 281 282 /* Create internal properties. */ 283 for (i = 0, ip = cpu_idle_prop_array; 284 i < sizeof (cpu_idle_prop_array) / sizeof (cpu_idle_prop_array[0]); 285 i++, ip++) { 286 (void) cpu_idle_prop_create_property(ip->name, ip->type, 287 ip->update, ip->arg, &ip->handle); 288 ASSERT(ip->handle != NULL); 289 } 290 291 /* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */ 292 sz = sizeof (cpu_idle_cb_state_t) * max_ncpus; 293 sz += CPU_CACHE_COHERENCE_SIZE; 294 buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); 295 cpu_idle_cb_state = (cpu_idle_cb_state_t *)P2ROUNDUP(buf, 296 CPU_CACHE_COHERENCE_SIZE); 297 298 /* Cache frequently used property value pointers. */ 299 for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { 300 idx = CPU_IDLE_CTX2IDX(i); 301 #define ___INIT_P(f, i) \ 302 sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx) 303 ___INIT_P(idle_state, CPU_IDLE_PROP_IDX_IDLE_STATE); 304 ___INIT_P(enter_ts, CPU_IDLE_PROP_IDX_ENTER_TS); 305 ___INIT_P(exit_ts, CPU_IDLE_PROP_IDX_EXIT_TS); 306 ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_LAST_IDLE); 307 ___INIT_P(last_busy, CPU_IDLE_PROP_IDX_LAST_BUSY); 308 ___INIT_P(total_idle, CPU_IDLE_PROP_IDX_TOTAL_IDLE); 309 ___INIT_P(total_busy, CPU_IDLE_PROP_IDX_TOTAL_BUSY); 310 ___INIT_P(last_idle, CPU_IDLE_PROP_IDX_INTR_CNT); 311 #undef ___INIT_P 312 } 313 314 /* Register built-in callbacks. */ 315 if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE, 316 &cpu_idle_callback_dtrace, NULL, &cpu_idle_cb_handle_dtrace) != 0) { 317 cmn_err(CE_PANIC, 318 "cpu_idle: failed to register callback for dtrace."); 319 } 320 #if defined(__x86) && !defined(__xpv) 321 if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB, 322 &cpu_idle_callback_tlb, NULL, &cpu_idle_cb_handle_tlb) != 0) { 323 cmn_err(CE_PANIC, 324 "cpu_idle: failed to register callback for tlb_flush."); 325 } 326 #endif 327 } 328 329 void 330 cpu_event_init_cpu(cpu_t *cp) 331 { 332 ASSERT(cp->cpu_seqid < max_ncpus); 333 cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; 334 } 335 336 void 337 cpu_event_fini_cpu(cpu_t *cp) 338 { 339 ASSERT(cp->cpu_seqid < max_ncpus); 340 cpu_idle_cb_state[cp->cpu_seqid].v.ready = B_FALSE; 341 } 342 343 static void 344 cpu_idle_insert_callback(cpu_idle_cb_impl_t *cip) 345 { 346 int unlock = 0, unpause = 0; 347 int i, cnt_new = 0, cnt_old = 0; 348 char *buf_new = NULL, *buf_old = NULL; 349 350 ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); 351 352 /* 353 * Expand array if it's full. 354 * Memory must be allocated out of pause/start_cpus() scope because 355 * kmem_zalloc() can't be called with KM_SLEEP flag within that scope. 356 */ 357 if (cpu_idle_cb_curr == cpu_idle_cb_max) { 358 cnt_new = cpu_idle_cb_max + CPU_IDLE_ARRAY_CAPACITY_INC; 359 buf_new = (char *)kmem_zalloc(cnt_new * 360 sizeof (cpu_idle_cb_item_t), KM_SLEEP); 361 } 362 363 /* Try to acquire cpu_lock if not held yet. */ 364 if (!MUTEX_HELD(&cpu_lock)) { 365 mutex_enter(&cpu_lock); 366 unlock = 1; 367 } 368 /* 369 * Pause all other CPUs (and let them run pause thread). 370 * It's guaranteed that no other threads will access cpu_idle_cb_array 371 * after pause_cpus(). 372 */ 373 if (!cpus_paused()) { 374 pause_cpus(NULL); 375 unpause = 1; 376 } 377 378 /* Copy content to new buffer if needed. */ 379 if (buf_new != NULL) { 380 buf_old = (char *)cpu_idle_cb_array; 381 cnt_old = cpu_idle_cb_max; 382 if (buf_old != NULL) { 383 ASSERT(cnt_old != 0); 384 bcopy(cpu_idle_cb_array, buf_new, 385 sizeof (cpu_idle_cb_item_t) * cnt_old); 386 } 387 cpu_idle_cb_array = (cpu_idle_cb_item_t *)buf_new; 388 cpu_idle_cb_max = cnt_new; 389 } 390 391 /* Insert into array according to priority. */ 392 ASSERT(cpu_idle_cb_curr < cpu_idle_cb_max); 393 for (i = cpu_idle_cb_curr; i > 0; i--) { 394 if (cpu_idle_cb_array[i - 1].impl->priority >= cip->priority) { 395 break; 396 } 397 cpu_idle_cb_array[i] = cpu_idle_cb_array[i - 1]; 398 } 399 cpu_idle_cb_array[i].arg = cip->argument; 400 cpu_idle_cb_array[i].enter = cip->callback->idle_enter; 401 cpu_idle_cb_array[i].exit = cip->callback->idle_exit; 402 cpu_idle_cb_array[i].impl = cip; 403 cpu_idle_cb_curr++; 404 405 /* Resume other CPUs from paused state if needed. */ 406 if (unpause) { 407 start_cpus(); 408 } 409 if (unlock) { 410 mutex_exit(&cpu_lock); 411 } 412 413 /* Free old resource if needed. */ 414 if (buf_old != NULL) { 415 ASSERT(cnt_old != 0); 416 kmem_free(buf_old, cnt_old * sizeof (cpu_idle_cb_item_t)); 417 } 418 } 419 420 static void 421 cpu_idle_remove_callback(cpu_idle_cb_impl_t *cip) 422 { 423 int i, found = 0; 424 int unlock = 0, unpause = 0; 425 cpu_idle_cb_state_t *sp; 426 427 ASSERT(MUTEX_HELD(&cpu_idle_cb_lock)); 428 429 /* Try to acquire cpu_lock if not held yet. */ 430 if (!MUTEX_HELD(&cpu_lock)) { 431 mutex_enter(&cpu_lock); 432 unlock = 1; 433 } 434 /* 435 * Pause all other CPUs. 436 * It's guaranteed that no other threads will access cpu_idle_cb_array 437 * after pause_cpus(). 438 */ 439 if (!cpus_paused()) { 440 pause_cpus(NULL); 441 unpause = 1; 442 } 443 444 /* Remove cip from array. */ 445 for (i = 0; i < cpu_idle_cb_curr; i++) { 446 if (found == 0) { 447 if (cpu_idle_cb_array[i].impl == cip) { 448 found = 1; 449 } 450 } else { 451 cpu_idle_cb_array[i - 1] = cpu_idle_cb_array[i]; 452 } 453 } 454 ASSERT(found != 0); 455 cpu_idle_cb_curr--; 456 457 /* 458 * Reset property ready flag for all CPUs if no registered callback 459 * left because cpu_idle_enter/exit will stop updating property if 460 * there's no callback registered. 461 */ 462 if (cpu_idle_cb_curr == 0) { 463 for (sp = cpu_idle_cb_state, i = 0; i < max_ncpus; i++, sp++) { 464 sp->v.ready = B_FALSE; 465 } 466 } 467 468 /* Resume other CPUs from paused state if needed. */ 469 if (unpause) { 470 start_cpus(); 471 } 472 if (unlock) { 473 mutex_exit(&cpu_lock); 474 } 475 } 476 477 int 478 cpu_idle_register_callback(uint_t prio, cpu_idle_callback_t *cbp, 479 void *arg, cpu_idle_callback_handle_t *hdlp) 480 { 481 cpu_idle_cb_state_t *sp; 482 cpu_idle_cb_impl_t *cip = NULL; 483 484 /* First validate parameters. */ 485 ASSERT(!CPU_ON_INTR(CPU)); 486 ASSERT(CPU->cpu_seqid < max_ncpus); 487 sp = &cpu_idle_cb_state[CPU->cpu_seqid]; 488 if (sp->v.index != 0) { 489 cmn_err(CE_NOTE, 490 "!cpu_event: register_callback called from callback."); 491 return (EBUSY); 492 } else if (cbp == NULL || hdlp == NULL) { 493 cmn_err(CE_NOTE, 494 "!cpu_event: NULL parameters in register_callback."); 495 return (EINVAL); 496 } else if (prio < CPU_IDLE_CB_PRIO_LOW_BASE || 497 prio >= CPU_IDLE_CB_PRIO_RESV_BASE) { 498 cmn_err(CE_NOTE, 499 "!cpu_event: priority 0x%x out of range.", prio); 500 return (EINVAL); 501 } else if (cbp->version != CPU_IDLE_CALLBACK_VERS) { 502 cmn_err(CE_NOTE, 503 "!cpu_event: callback version %d is not supported.", 504 cbp->version); 505 return (EINVAL); 506 } 507 508 mutex_enter(&cpu_idle_cb_lock); 509 /* Check whether callback with priority exists if not dynamic. */ 510 if (prio != CPU_IDLE_CB_PRIO_DYNAMIC) { 511 for (cip = cpu_idle_cb_busy; cip != NULL; 512 cip = cip->next) { 513 if (cip->priority == prio) { 514 mutex_exit(&cpu_idle_cb_lock); 515 cmn_err(CE_NOTE, "!cpu_event: callback with " 516 "priority 0x%x already exists.", prio); 517 return (EEXIST); 518 } 519 } 520 } 521 522 cip = kmem_zalloc(sizeof (*cip), KM_SLEEP); 523 cip->callback = cbp; 524 cip->argument = arg; 525 cip->priority = prio; 526 cip->next = cpu_idle_cb_busy; 527 cpu_idle_cb_busy = cip; 528 cpu_idle_insert_callback(cip); 529 mutex_exit(&cpu_idle_cb_lock); 530 531 *hdlp = (cpu_idle_callback_handle_t)cip; 532 533 return (0); 534 } 535 536 int 537 cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl) 538 { 539 int rc = ENODEV; 540 cpu_idle_cb_state_t *sp; 541 cpu_idle_cb_impl_t *ip, **ipp; 542 543 ASSERT(!CPU_ON_INTR(CPU)); 544 ASSERT(CPU->cpu_seqid < max_ncpus); 545 sp = &cpu_idle_cb_state[CPU->cpu_seqid]; 546 if (sp->v.index != 0) { 547 cmn_err(CE_NOTE, 548 "!cpu_event: unregister_callback called from callback."); 549 return (EBUSY); 550 } else if (hdl == NULL) { 551 cmn_err(CE_NOTE, 552 "!cpu_event: hdl is NULL in unregister_callback."); 553 return (EINVAL); 554 } 555 556 ip = (cpu_idle_cb_impl_t *)hdl; 557 mutex_enter(&cpu_idle_cb_lock); 558 for (ipp = &cpu_idle_cb_busy; *ipp != NULL; ipp = &(*ipp)->next) { 559 if (*ipp == ip) { 560 *ipp = ip->next; 561 cpu_idle_remove_callback(ip); 562 rc = 0; 563 break; 564 } 565 } 566 mutex_exit(&cpu_idle_cb_lock); 567 568 if (rc == 0) { 569 kmem_free(ip, sizeof (*ip)); 570 } else { 571 cmn_err(CE_NOTE, 572 "!cpu_event: callback handle %p not found.", (void *)hdl); 573 } 574 575 return (rc); 576 } 577 578 static int 579 cpu_idle_enter_state(cpu_idle_cb_state_t *sp, intptr_t state) 580 { 581 sp->v.idle_state->cipv_intptr = state; 582 sp->v.enter_ts->cipv_hrtime = gethrtime_unscaled(); 583 sp->v.last_busy->cipv_hrtime = sp->v.enter_ts->cipv_hrtime - 584 sp->v.exit_ts->cipv_hrtime; 585 sp->v.total_busy->cipv_hrtime += sp->v.last_busy->cipv_hrtime; 586 if (sp->v.ready == B_FALSE) { 587 sp->v.ready = B_TRUE; 588 return (0); 589 } 590 591 return (1); 592 } 593 594 static void 595 cpu_idle_exit_state(cpu_idle_cb_state_t *sp) 596 { 597 sp->v.idle_state->cipv_intptr = CPU_IDLE_STATE_NORMAL; 598 sp->v.exit_ts->cipv_hrtime = gethrtime_unscaled(); 599 sp->v.last_idle->cipv_hrtime = sp->v.exit_ts->cipv_hrtime - 600 sp->v.enter_ts->cipv_hrtime; 601 sp->v.total_idle->cipv_hrtime += sp->v.last_idle->cipv_hrtime; 602 } 603 604 /*ARGSUSED*/ 605 int 606 cpu_idle_enter(int state, int flag, 607 cpu_idle_check_wakeup_t check_func, void *check_arg) 608 { 609 int i; 610 cpu_idle_cb_item_t *cip; 611 cpu_idle_cb_state_t *sp; 612 cpu_idle_callback_context_t ctx; 613 #if defined(__x86) 614 ulong_t iflags; 615 #endif 616 617 ctx = CPU_IDLE_GET_CTX(CPU); 618 ASSERT(CPU->cpu_seqid < max_ncpus); 619 sp = &cpu_idle_cb_state[CPU->cpu_seqid]; 620 ASSERT(sp->v.index == 0); 621 622 /* 623 * On x86, cpu_idle_enter can be called from idle thread with either 624 * interrupts enabled or disabled, so we need to make sure interrupts 625 * are disabled here. 626 * On SPARC, cpu_idle_enter will be called from idle thread with 627 * interrupt disabled, so no special handling necessary. 628 */ 629 #if defined(__x86) 630 iflags = intr_clear(); 631 #endif 632 633 /* Skip calling callback if state is not ready for current CPU. */ 634 if (cpu_idle_enter_state(sp, state) == 0) { 635 #if defined(__x86) 636 intr_restore(iflags); 637 #endif 638 return (0); 639 } 640 641 for (i = 0, cip = cpu_idle_cb_array; i < cpu_idle_cb_curr; i++, cip++) { 642 /* 643 * Increase index so corresponding idle_exit callback 644 * will be invoked should interrupt happen during 645 * idle_enter callback. 646 */ 647 sp->v.index++; 648 649 /* Call idle_enter callback function if it's not NULL. */ 650 if (cip->enter != NULL) { 651 cip->enter(cip->arg, ctx, check_func, check_arg); 652 653 /* 654 * cpu_idle_enter runs with interrupts 655 * disabled, so the idle_enter callbacks will 656 * also be called with interrupts disabled. 657 * It is permissible for the callbacks to 658 * enable the interrupts, if they can also 659 * handle the condition if the interrupt 660 * occurs. 661 * 662 * However, if an interrupt occurs and we 663 * return here without dealing with it, we 664 * return to the cpu_idle_enter() caller 665 * with an EBUSY, and the caller will not 666 * enter the idle state. 667 * 668 * We detect the interrupt, by checking the 669 * index value of the state pointer. If it 670 * is not the index we incremented above, 671 * then it was cleared while processing 672 * the interrupt. 673 * 674 * Also note, that at this point of the code 675 * the normal index value will be one greater 676 * than the variable 'i' in the loop, as it 677 * hasn't yet been incremented. 678 */ 679 if (sp->v.index != i + 1) { 680 #if defined(__x86) 681 intr_restore(iflags); 682 #endif 683 return (EBUSY); 684 } 685 } 686 } 687 #if defined(__x86) 688 intr_restore(iflags); 689 #endif 690 691 return (0); 692 } 693 694 void 695 cpu_idle_exit(int flag) 696 { 697 int i; 698 cpu_idle_cb_item_t *cip; 699 cpu_idle_cb_state_t *sp; 700 cpu_idle_callback_context_t ctx; 701 #if defined(__x86) 702 ulong_t iflags; 703 #endif 704 705 ASSERT(CPU->cpu_seqid < max_ncpus); 706 sp = &cpu_idle_cb_state[CPU->cpu_seqid]; 707 708 #if defined(__sparc) 709 /* 710 * On SPARC, cpu_idle_exit will only be called from idle thread 711 * with interrupt disabled. 712 */ 713 714 if (sp->v.index != 0) { 715 ctx = CPU_IDLE_GET_CTX(CPU); 716 cpu_idle_exit_state(sp); 717 for (i = sp->v.index - 1; i >= 0; i--) { 718 cip = &cpu_idle_cb_array[i]; 719 if (cip->exit != NULL) { 720 cip->exit(cip->arg, ctx, flag); 721 } 722 } 723 sp->v.index = 0; 724 } 725 #elif defined(__x86) 726 /* 727 * On x86, cpu_idle_exit will be called from idle thread or interrupt 728 * handler. When called from interrupt handler, interrupts will be 729 * disabled. When called from idle thread, interrupts may be disabled 730 * or enabled. 731 */ 732 733 /* Called from interrupt, interrupts are already disabled. */ 734 if (flag & CPU_IDLE_CB_FLAG_INTR) { 735 /* 736 * return if cpu_idle_exit already called or 737 * there is no registered callback. 738 */ 739 if (sp->v.index == 0) { 740 return; 741 } 742 ctx = CPU_IDLE_GET_CTX(CPU); 743 cpu_idle_exit_state(sp); 744 for (i = sp->v.index - 1; i >= 0; i--) { 745 cip = &cpu_idle_cb_array[i]; 746 if (cip->exit != NULL) { 747 cip->exit(cip->arg, ctx, flag); 748 } 749 } 750 sp->v.index = 0; 751 752 /* Called from idle thread, need to disable interrupt. */ 753 } else { 754 iflags = intr_clear(); 755 if (sp->v.index != 0) { 756 ctx = CPU_IDLE_GET_CTX(CPU); 757 cpu_idle_exit_state(sp); 758 for (i = sp->v.index - 1; i >= 0; i--) { 759 cip = &cpu_idle_cb_array[i]; 760 if (cip->exit != NULL) { 761 cip->exit(cip->arg, ctx, flag); 762 } 763 } 764 sp->v.index = 0; 765 } 766 intr_restore(iflags); 767 } 768 #endif 769 } 770 771 cpu_idle_callback_context_t 772 cpu_idle_get_context(void) 773 { 774 return (CPU_IDLE_GET_CTX(CPU)); 775 } 776 777 /* 778 * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve 779 * cache efficiency. To simplify implementation, allocated memory for property 780 * structure won't be freed. 781 */ 782 static void 783 cpu_idle_prop_allocate_impl(void) 784 { 785 int i; 786 size_t sz; 787 intptr_t buf; 788 cpu_idle_prop_impl_t *prop; 789 cpu_idle_prop_value_t *valp; 790 791 ASSERT(!CPU_ON_INTR(CPU)); 792 prop = kmem_zalloc(sizeof (*prop) * CPU_IDLE_VALUE_GROUP_SIZE, 793 KM_SLEEP); 794 sz = sizeof (*valp) * CPU_IDLE_VALUE_GROUP_SIZE * max_ncpus; 795 sz += CPU_CACHE_COHERENCE_SIZE; 796 buf = (intptr_t)kmem_zalloc(sz, KM_SLEEP); 797 valp = (cpu_idle_prop_value_t *)P2ROUNDUP(buf, 798 CPU_CACHE_COHERENCE_SIZE); 799 800 for (i = 0; i < CPU_IDLE_VALUE_GROUP_SIZE; i++, prop++, valp++) { 801 prop->value = valp; 802 prop->next = cpu_idle_prop_free; 803 cpu_idle_prop_free = prop; 804 } 805 } 806 807 int 808 cpu_idle_prop_create_property(const char *name, cpu_idle_prop_type_t type, 809 cpu_idle_prop_update_t update, void *arg, cpu_idle_prop_handle_t *hdlp) 810 { 811 int rc = EEXIST; 812 cpu_idle_prop_impl_t *prop; 813 814 ASSERT(!CPU_ON_INTR(CPU)); 815 if (name == NULL || hdlp == NULL) { 816 cmn_err(CE_WARN, 817 "!cpu_event: NULL parameters in create_property."); 818 return (EINVAL); 819 } 820 821 mutex_enter(&cpu_idle_prop_lock); 822 for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { 823 if (strcmp(prop->name, name) == 0) { 824 cmn_err(CE_NOTE, 825 "!cpu_event: property %s already exists.", name); 826 break; 827 } 828 } 829 if (prop == NULL) { 830 if (cpu_idle_prop_free == NULL) { 831 cpu_idle_prop_allocate_impl(); 832 } 833 ASSERT(cpu_idle_prop_free != NULL); 834 prop = cpu_idle_prop_free; 835 cpu_idle_prop_free = prop->next; 836 prop->next = cpu_idle_prop_busy; 837 cpu_idle_prop_busy = prop; 838 839 ASSERT(prop->value != NULL); 840 prop->name = strdup(name); 841 prop->type = type; 842 prop->update = update; 843 prop->private = arg; 844 prop->refcnt = 1; 845 *hdlp = prop; 846 rc = 0; 847 } 848 mutex_exit(&cpu_idle_prop_lock); 849 850 return (rc); 851 } 852 853 int 854 cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl) 855 { 856 int rc = ENODEV; 857 cpu_idle_prop_impl_t *prop, **propp; 858 cpu_idle_prop_value_t *valp; 859 860 ASSERT(!CPU_ON_INTR(CPU)); 861 if (hdl == NULL) { 862 cmn_err(CE_WARN, 863 "!cpu_event: hdl is NULL in destroy_property."); 864 return (EINVAL); 865 } 866 867 prop = (cpu_idle_prop_impl_t *)hdl; 868 mutex_enter(&cpu_idle_prop_lock); 869 for (propp = &cpu_idle_prop_busy; *propp != NULL; 870 propp = &(*propp)->next) { 871 if (*propp == prop) { 872 ASSERT(prop->refcnt > 0); 873 if (atomic_cas_32(&prop->refcnt, 1, 0) == 1) { 874 *propp = prop->next; 875 strfree(prop->name); 876 valp = prop->value; 877 bzero(prop, sizeof (*prop)); 878 prop->value = valp; 879 prop->next = cpu_idle_prop_free; 880 cpu_idle_prop_free = prop; 881 rc = 0; 882 } else { 883 rc = EBUSY; 884 } 885 break; 886 } 887 } 888 mutex_exit(&cpu_idle_prop_lock); 889 890 return (rc); 891 } 892 893 int 894 cpu_idle_prop_create_handle(const char *name, cpu_idle_prop_handle_t *hdlp) 895 { 896 int rc = ENODEV; 897 cpu_idle_prop_impl_t *prop; 898 899 ASSERT(!CPU_ON_INTR(CPU)); 900 if (name == NULL || hdlp == NULL) { 901 cmn_err(CE_WARN, 902 "!cpu_event: NULL parameters in create_handle."); 903 return (EINVAL); 904 } 905 906 mutex_enter(&cpu_idle_prop_lock); 907 for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { 908 if (strcmp(prop->name, name) == 0) { 909 /* Hold one refcount on object. */ 910 ASSERT(prop->refcnt > 0); 911 atomic_inc_32(&prop->refcnt); 912 *hdlp = (cpu_idle_prop_handle_t)prop; 913 rc = 0; 914 break; 915 } 916 } 917 mutex_exit(&cpu_idle_prop_lock); 918 919 return (rc); 920 } 921 922 int 923 cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl) 924 { 925 int rc = ENODEV; 926 cpu_idle_prop_impl_t *prop; 927 928 ASSERT(!CPU_ON_INTR(CPU)); 929 if (hdl == NULL) { 930 cmn_err(CE_WARN, 931 "!cpu_event: hdl is NULL in destroy_handle."); 932 return (EINVAL); 933 } 934 935 mutex_enter(&cpu_idle_prop_lock); 936 for (prop = cpu_idle_prop_busy; prop != NULL; prop = prop->next) { 937 if (prop == hdl) { 938 /* Release refcnt held in create_handle. */ 939 ASSERT(prop->refcnt > 1); 940 atomic_dec_32(&prop->refcnt); 941 rc = 0; 942 break; 943 } 944 } 945 mutex_exit(&cpu_idle_prop_lock); 946 947 return (rc); 948 } 949 950 cpu_idle_prop_type_t 951 cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl) 952 { 953 ASSERT(hdl != NULL); 954 return (((cpu_idle_prop_impl_t *)hdl)->type); 955 } 956 957 const char * 958 cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl) 959 { 960 ASSERT(hdl != NULL); 961 return (((cpu_idle_prop_impl_t *)hdl)->name); 962 } 963 964 int 965 cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl, 966 cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t *valp) 967 { 968 int idx, rc = 0; 969 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 970 971 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 972 if (hdl == NULL || valp == NULL) { 973 cmn_err(CE_NOTE, "!cpu_event: NULL parameters in prop_get."); 974 return (EINVAL); 975 } 976 idx = CPU_IDLE_CTX2IDX(ctx); 977 if (prop->update != NULL) { 978 cpu_idle_cb_state_t *sp; 979 980 ASSERT(CPU->cpu_seqid < max_ncpus); 981 sp = &cpu_idle_cb_state[CPU->cpu_seqid]; 982 /* CPU's idle enter timestamp as sequence number. */ 983 rc = prop->update(prop->private, 984 (uint64_t)sp->v.enter_ts->cipv_hrtime, &prop->value[idx]); 985 } 986 if (rc == 0) { 987 *valp = prop->value[idx]; 988 } 989 990 return (rc); 991 } 992 993 uint32_t 994 cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl, 995 cpu_idle_callback_context_t ctx) 996 { 997 int idx; 998 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 999 1000 ASSERT(hdl != NULL); 1001 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 1002 idx = CPU_IDLE_CTX2IDX(ctx); 1003 return (prop->value[idx].cipv_uint32); 1004 } 1005 1006 uint64_t 1007 cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl, 1008 cpu_idle_callback_context_t ctx) 1009 { 1010 int idx; 1011 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 1012 1013 ASSERT(hdl != NULL); 1014 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 1015 idx = CPU_IDLE_CTX2IDX(ctx); 1016 return (prop->value[idx].cipv_uint64); 1017 } 1018 1019 intptr_t 1020 cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl, 1021 cpu_idle_callback_context_t ctx) 1022 { 1023 int idx; 1024 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 1025 1026 ASSERT(hdl != NULL); 1027 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 1028 idx = CPU_IDLE_CTX2IDX(ctx); 1029 return (prop->value[idx].cipv_intptr); 1030 } 1031 1032 hrtime_t 1033 cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl, 1034 cpu_idle_callback_context_t ctx) 1035 { 1036 int idx; 1037 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 1038 1039 ASSERT(hdl != NULL); 1040 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 1041 idx = CPU_IDLE_CTX2IDX(ctx); 1042 return (prop->value[idx].cipv_hrtime); 1043 } 1044 1045 void 1046 cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl, 1047 cpu_idle_callback_context_t ctx, cpu_idle_prop_value_t val) 1048 { 1049 int idx; 1050 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 1051 1052 ASSERT(hdl != NULL); 1053 ASSERT(CPU_IDLE_CTX2CPUID(ctx) < max_ncpus); 1054 idx = CPU_IDLE_CTX2IDX(ctx); 1055 prop->value[idx] = val; 1056 } 1057 1058 void 1059 cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl, cpu_idle_prop_value_t val) 1060 { 1061 int i, idx; 1062 cpu_idle_prop_impl_t *prop = (cpu_idle_prop_impl_t *)hdl; 1063 1064 ASSERT(hdl != NULL); 1065 for (i = 0; i < max_ncpus; i++) { 1066 idx = CPU_IDLE_CTX2IDX(i); 1067 prop->value[idx] = val; 1068 } 1069 } 1070 1071 /*ARGSUSED*/ 1072 static int cpu_idle_prop_update_intr_cnt(void *arg, uint64_t seqnum, 1073 cpu_idle_prop_value_t *valp) 1074 { 1075 int i; 1076 uint64_t val; 1077 1078 for (val = 0, i = 0; i < PIL_MAX; i++) { 1079 val += CPU->cpu_stats.sys.intr[i]; 1080 } 1081 valp->cipv_uint64 = val; 1082 1083 return (0); 1084 } 1085 1086 uint_t 1087 cpu_idle_get_cpu_state(cpu_t *cp) 1088 { 1089 ASSERT(cp != NULL && cp->cpu_seqid < max_ncpus); 1090 return ((uint_t)cpu_idle_prop_get_uint32( 1091 cpu_idle_prop_array[CPU_IDLE_PROP_IDX_IDLE_STATE].handle, 1092 CPU_IDLE_GET_CTX(cp))); 1093 } 1094