1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * CPU Device driver. The driver is not DDI-compliant. 30 * 31 * The driver supports following features: 32 * - Power management. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/errno.h> 38 #include <sys/modctl.h> 39 #include <sys/kmem.h> 40 #include <sys/conf.h> 41 #include <sys/cmn_err.h> 42 #include <sys/stat.h> 43 #include <sys/debug.h> 44 #include <sys/systm.h> 45 #include <sys/ddi.h> 46 #include <sys/sunddi.h> 47 #include <sys/sdt.h> 48 49 #include <sys/machsystm.h> 50 #include <sys/x_call.h> 51 #include <sys/cpudrv.h> 52 #include <sys/cpudrv_plat.h> 53 #include <sys/msacct.h> 54 55 /* 56 * CPU power management 57 * 58 * The supported power saving model is to slow down the CPU (on SPARC by 59 * dividing the CPU clock and on x86 by dropping down a P-state). 60 * Periodically we determine the amount of time the CPU is running 61 * idle thread and threads in user mode during the last quantum. If the idle 62 * thread was running less than its low water mark for current speed for 63 * number of consecutive sampling periods, or number of running threads in 64 * user mode are above its high water mark, we arrange to go to the higher 65 * speed. If the idle thread was running more than its high water mark without 66 * dropping a number of consecutive times below the mark, and number of threads 67 * running in user mode are below its low water mark, we arrange to go to the 68 * next lower speed. While going down, we go through all the speeds. While 69 * going up we go to the maximum speed to minimize impact on the user, but have 70 * provisions in the driver to go to other speeds. 71 * 72 * The driver does not have knowledge of a particular implementation of this 73 * scheme and will work with all CPUs supporting this model. On SPARC, the 74 * driver determines supported speeds by looking at 'clock-divisors' property 75 * created by OBP. On x86, the driver retrieves the supported speeds from 76 * ACPI. 77 */ 78 79 /* 80 * Configuration function prototypes and data structures 81 */ 82 static int cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 83 static int cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 84 static int cpudrv_power(dev_info_t *dip, int comp, int level); 85 86 struct dev_ops cpudrv_ops = { 87 DEVO_REV, /* rev */ 88 0, /* refcnt */ 89 nodev, /* getinfo */ 90 nulldev, /* identify */ 91 nulldev, /* probe */ 92 cpudrv_attach, /* attach */ 93 cpudrv_detach, /* detach */ 94 nodev, /* reset */ 95 (struct cb_ops *)NULL, /* cb_ops */ 96 (struct bus_ops *)NULL, /* bus_ops */ 97 cpudrv_power /* power */ 98 }; 99 100 static struct modldrv modldrv = { 101 &mod_driverops, /* modops */ 102 "CPU Driver %I%", /* linkinfo */ 103 &cpudrv_ops, /* dev_ops */ 104 }; 105 106 static struct modlinkage modlinkage = { 107 MODREV_1, /* rev */ 108 &modldrv, /* linkage */ 109 NULL 110 }; 111 112 /* 113 * Function prototypes 114 */ 115 static int cpudrv_pm_init(cpudrv_devstate_t *cpudsp); 116 static void cpudrv_pm_free(cpudrv_devstate_t *cpudsp); 117 static int cpudrv_pm_comp_create(cpudrv_devstate_t *cpudsp); 118 static void cpudrv_pm_monitor_disp(void *arg); 119 static void cpudrv_pm_monitor(void *arg); 120 121 /* 122 * Driver global variables 123 */ 124 uint_t cpudrv_debug = 0; 125 void *cpudrv_state; 126 static uint_t cpudrv_pm_idle_hwm = CPUDRV_PM_IDLE_HWM; 127 static uint_t cpudrv_pm_idle_lwm = CPUDRV_PM_IDLE_LWM; 128 static uint_t cpudrv_pm_idle_buf_zone = CPUDRV_PM_IDLE_BUF_ZONE; 129 static uint_t cpudrv_pm_idle_bhwm_cnt_max = CPUDRV_PM_IDLE_BHWM_CNT_MAX; 130 static uint_t cpudrv_pm_idle_blwm_cnt_max = CPUDRV_PM_IDLE_BLWM_CNT_MAX; 131 static uint_t cpudrv_pm_user_hwm = CPUDRV_PM_USER_HWM; 132 133 /* 134 * cpudrv_direct_pm allows user applications to directly control the 135 * power state transitions (direct pm) without following the normal 136 * direct pm protocol. This is needed because the normal protocol 137 * requires that a device only be lowered when it is idle, and be 138 * brought up when it request to do so by calling pm_raise_power(). 139 * Ignoring this protocol is harmless for CPU (other than speed). 140 * Moreover it might be the case that CPU is never idle or wants 141 * to be at higher speed because of the addition CPU cycles required 142 * to run the user application. 143 * 144 * The driver will still report idle/busy status to the framework. Although 145 * framework will ignore this information for direct pm devices and not 146 * try to bring them down when idle, user applications can still use this 147 * information if they wants. 148 * 149 * In the future, provide an ioctl to control setting of this mode. In 150 * that case, this variable should move to the state structure and 151 * be protected by the lock in the state structure. 152 */ 153 int cpudrv_direct_pm = 0; 154 155 /* 156 * Arranges for the handler function to be called at the interval suitable 157 * for current speed. 158 */ 159 #define CPUDRV_PM_MONITOR_INIT(cpudsp) { \ 160 ASSERT(mutex_owned(&(cpudsp)->lock)); \ 161 (cpudsp)->cpudrv_pm.timeout_id = timeout(cpudrv_pm_monitor_disp, \ 162 (cpudsp), (((cpudsp)->cpudrv_pm.cur_spd == NULL) ? \ 163 CPUDRV_PM_QUANT_CNT_OTHR : \ 164 (cpudsp)->cpudrv_pm.cur_spd->quant_cnt)); \ 165 } 166 167 /* 168 * Arranges for the handler function not to be called back. 169 */ 170 #define CPUDRV_PM_MONITOR_FINI(cpudsp) { \ 171 timeout_id_t tmp_tid; \ 172 ASSERT(mutex_owned(&(cpudsp)->lock)); \ 173 ASSERT((cpudsp)->cpudrv_pm.timeout_id); \ 174 tmp_tid = (cpudsp)->cpudrv_pm.timeout_id; \ 175 (cpudsp)->cpudrv_pm.timeout_id = 0; \ 176 mutex_exit(&(cpudsp)->lock); \ 177 (void) untimeout(tmp_tid); \ 178 mutex_enter(&(cpudsp)->cpudrv_pm.timeout_lock); \ 179 while ((cpudsp)->cpudrv_pm.timeout_count != 0) \ 180 cv_wait(&(cpudsp)->cpudrv_pm.timeout_cv, \ 181 &(cpudsp)->cpudrv_pm.timeout_lock); \ 182 mutex_exit(&(cpudsp)->cpudrv_pm.timeout_lock); \ 183 mutex_enter(&(cpudsp)->lock); \ 184 } 185 186 int 187 _init(void) 188 { 189 int error; 190 191 DPRINTF(D_INIT, (" _init: function called\n")); 192 if ((error = ddi_soft_state_init(&cpudrv_state, 193 sizeof (cpudrv_devstate_t), 0)) != 0) { 194 return (error); 195 } 196 197 if ((error = mod_install(&modlinkage)) != 0) { 198 ddi_soft_state_fini(&cpudrv_state); 199 } 200 201 /* 202 * Callbacks used by the PPM driver. 203 */ 204 CPUDRV_PM_SET_PPM_CALLBACKS(); 205 return (error); 206 } 207 208 int 209 _fini(void) 210 { 211 int error; 212 213 DPRINTF(D_FINI, (" _fini: function called\n")); 214 if ((error = mod_remove(&modlinkage)) == 0) { 215 ddi_soft_state_fini(&cpudrv_state); 216 } 217 218 return (error); 219 } 220 221 int 222 _info(struct modinfo *modinfop) 223 { 224 return (mod_info(&modlinkage, modinfop)); 225 } 226 227 /* 228 * Driver attach(9e) entry point. 229 */ 230 static int 231 cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 232 { 233 int instance; 234 cpudrv_devstate_t *cpudsp; 235 extern pri_t maxclsyspri; 236 237 instance = ddi_get_instance(dip); 238 239 switch (cmd) { 240 case DDI_ATTACH: 241 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: " 242 "DDI_ATTACH called\n", instance)); 243 if (ddi_soft_state_zalloc(cpudrv_state, instance) != 244 DDI_SUCCESS) { 245 cmn_err(CE_WARN, "cpudrv_attach: instance %d: " 246 "can't allocate state", instance); 247 CPUDRV_PM_DISABLE(); 248 return (DDI_FAILURE); 249 } 250 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == 251 NULL) { 252 cmn_err(CE_WARN, "cpudrv_attach: instance %d: " 253 "can't get state", instance); 254 ddi_soft_state_free(cpudrv_state, instance); 255 CPUDRV_PM_DISABLE(); 256 return (DDI_FAILURE); 257 } 258 cpudsp->dip = dip; 259 260 /* 261 * Find CPU number for this dev_info node. 262 */ 263 if (!cpudrv_pm_get_cpu_id(dip, &(cpudsp->cpu_id))) { 264 cmn_err(CE_WARN, "cpudrv_attach: instance %d: " 265 "can't convert dip to cpu_id", instance); 266 ddi_soft_state_free(cpudrv_state, instance); 267 CPUDRV_PM_DISABLE(); 268 return (DDI_FAILURE); 269 } 270 if (cpudrv_pm_init(cpudsp) != DDI_SUCCESS) { 271 ddi_soft_state_free(cpudrv_state, instance); 272 CPUDRV_PM_DISABLE(); 273 return (DDI_FAILURE); 274 } 275 if (cpudrv_pm_comp_create(cpudsp) != DDI_SUCCESS) { 276 ddi_soft_state_free(cpudrv_state, instance); 277 CPUDRV_PM_DISABLE(); 278 cpudrv_pm_free(cpudsp); 279 return (DDI_FAILURE); 280 } 281 if (ddi_prop_update_string(DDI_DEV_T_NONE, 282 dip, "pm-class", "CPU") != DDI_PROP_SUCCESS) { 283 ddi_soft_state_free(cpudrv_state, instance); 284 CPUDRV_PM_DISABLE(); 285 cpudrv_pm_free(cpudsp); 286 return (DDI_FAILURE); 287 } 288 289 /* 290 * Taskq is used to dispatch routine to monitor CPU activities. 291 */ 292 cpudsp->cpudrv_pm.tq = taskq_create_instance( 293 "cpudrv_pm_monitor", 294 ddi_get_instance(dip), CPUDRV_PM_TASKQ_THREADS, 295 (maxclsyspri - 1), CPUDRV_PM_TASKQ_MIN, 296 CPUDRV_PM_TASKQ_MAX, TASKQ_PREPOPULATE|TASKQ_CPR_SAFE); 297 298 mutex_init(&cpudsp->lock, NULL, MUTEX_DRIVER, NULL); 299 mutex_init(&cpudsp->cpudrv_pm.timeout_lock, NULL, MUTEX_DRIVER, 300 NULL); 301 cv_init(&cpudsp->cpudrv_pm.timeout_cv, NULL, CV_DEFAULT, NULL); 302 303 /* 304 * Driver needs to assume that CPU is running at unknown speed 305 * at DDI_ATTACH and switch it to the needed speed. We assume 306 * that initial needed speed is full speed for us. 307 */ 308 /* 309 * We need to take the lock because cpudrv_pm_monitor() 310 * will start running in parallel with attach(). 311 */ 312 mutex_enter(&cpudsp->lock); 313 cpudsp->cpudrv_pm.cur_spd = NULL; 314 cpudsp->cpudrv_pm.targ_spd = cpudsp->cpudrv_pm.head_spd; 315 cpudsp->cpudrv_pm.pm_started = B_FALSE; 316 /* 317 * We don't call pm_raise_power() directly from attach because 318 * driver attach for a slave CPU node can happen before the 319 * CPU is even initialized. We just start the monitoring 320 * system which understands unknown speed and moves CPU 321 * to targ_spd when it have been initialized. 322 */ 323 CPUDRV_PM_MONITOR_INIT(cpudsp); 324 mutex_exit(&cpudsp->lock); 325 326 CPUDRV_PM_INSTALL_TOPSPEED_CHANGE_HANDLER(cpudsp, dip); 327 328 ddi_report_dev(dip); 329 return (DDI_SUCCESS); 330 331 case DDI_RESUME: 332 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: " 333 "DDI_RESUME called\n", instance)); 334 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == 335 NULL) { 336 cmn_err(CE_WARN, "cpudrv_attach: instance %d: " 337 "can't get state", instance); 338 return (DDI_FAILURE); 339 } 340 mutex_enter(&cpudsp->lock); 341 /* 342 * Driver needs to assume that CPU is running at unknown speed 343 * at DDI_RESUME and switch it to the needed speed. We assume 344 * that the needed speed is full speed for us. 345 */ 346 cpudsp->cpudrv_pm.cur_spd = NULL; 347 cpudsp->cpudrv_pm.targ_spd = cpudsp->cpudrv_pm.head_spd; 348 CPUDRV_PM_MONITOR_INIT(cpudsp); 349 mutex_exit(&cpudsp->lock); 350 CPUDRV_PM_REDEFINE_TOPSPEED(dip); 351 return (DDI_SUCCESS); 352 353 default: 354 return (DDI_FAILURE); 355 } 356 } 357 358 /* 359 * Driver detach(9e) entry point. 360 */ 361 static int 362 cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 363 { 364 int instance; 365 cpudrv_devstate_t *cpudsp; 366 cpudrv_pm_t *cpupm; 367 368 instance = ddi_get_instance(dip); 369 370 switch (cmd) { 371 case DDI_DETACH: 372 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: " 373 "DDI_DETACH called\n", instance)); 374 /* 375 * If the only thing supported by the driver is power 376 * management, we can in future enhance the driver and 377 * framework that loads it to unload the driver when 378 * user has disabled CPU power management. 379 */ 380 return (DDI_FAILURE); 381 382 case DDI_SUSPEND: 383 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: " 384 "DDI_SUSPEND called\n", instance)); 385 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == 386 NULL) { 387 cmn_err(CE_WARN, "cpudrv_detach: instance %d: " 388 "can't get state", instance); 389 return (DDI_FAILURE); 390 } 391 /* 392 * During a checkpoint-resume sequence, framework will 393 * stop interrupts to quiesce kernel activity. This will 394 * leave our monitoring system ineffective. Handle this 395 * by stopping our monitoring system and bringing CPU 396 * to full speed. In case we are in special direct pm 397 * mode, we leave the CPU at whatever speed it is. This 398 * is harmless other than speed. 399 */ 400 mutex_enter(&cpudsp->lock); 401 cpupm = &(cpudsp->cpudrv_pm); 402 403 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: DDI_SUSPEND - " 404 "cur_spd %d, head_spd %d\n", instance, 405 cpupm->cur_spd->pm_level, cpupm->head_spd->pm_level)); 406 407 CPUDRV_PM_MONITOR_FINI(cpudsp); 408 409 if (!cpudrv_direct_pm && (cpupm->cur_spd != cpupm->head_spd)) { 410 if (cpupm->pm_busycnt < 1) { 411 if ((pm_busy_component(dip, CPUDRV_PM_COMP_NUM) 412 == DDI_SUCCESS)) { 413 cpupm->pm_busycnt++; 414 } else { 415 CPUDRV_PM_MONITOR_INIT(cpudsp); 416 mutex_exit(&cpudsp->lock); 417 cmn_err(CE_WARN, "cpudrv_detach: " 418 "instance %d: can't busy CPU " 419 "component", instance); 420 return (DDI_FAILURE); 421 } 422 } 423 mutex_exit(&cpudsp->lock); 424 if (pm_raise_power(dip, CPUDRV_PM_COMP_NUM, 425 cpupm->head_spd->pm_level) != DDI_SUCCESS) { 426 mutex_enter(&cpudsp->lock); 427 CPUDRV_PM_MONITOR_INIT(cpudsp); 428 mutex_exit(&cpudsp->lock); 429 cmn_err(CE_WARN, "cpudrv_detach: instance %d: " 430 "can't raise CPU power level", instance); 431 return (DDI_FAILURE); 432 } else { 433 return (DDI_SUCCESS); 434 } 435 } else { 436 mutex_exit(&cpudsp->lock); 437 return (DDI_SUCCESS); 438 } 439 440 default: 441 return (DDI_FAILURE); 442 } 443 } 444 445 /* 446 * Driver power(9e) entry point. 447 * 448 * Driver's notion of current power is set *only* in power(9e) entry point 449 * after actual power change operation has been successfully completed. 450 */ 451 /* ARGSUSED */ 452 static int 453 cpudrv_power(dev_info_t *dip, int comp, int level) 454 { 455 int instance; 456 cpudrv_devstate_t *cpudsp; 457 cpudrv_pm_t *cpupm; 458 cpudrv_pm_spd_t *new_spd; 459 boolean_t is_ready; 460 int ret; 461 462 instance = ddi_get_instance(dip); 463 464 DPRINTF(D_POWER, ("cpudrv_power: instance %d: level %d\n", 465 instance, level)); 466 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == NULL) { 467 cmn_err(CE_WARN, "cpudrv_power: instance %d: can't get state", 468 instance); 469 return (DDI_FAILURE); 470 } 471 472 mutex_enter(&cpudsp->lock); 473 cpupm = &(cpudsp->cpudrv_pm); 474 475 /* 476 * In normal operation, we fail if we are busy and request is 477 * to lower the power level. We let this go through if the driver 478 * is in special direct pm mode. On x86, we also let this through 479 * if the change is due to a request to throttle the max speed. 480 */ 481 if (!cpudrv_direct_pm && (cpupm->pm_busycnt >= 1) && 482 !cpudrv_pm_is_throttle_thread(cpupm)) { 483 if ((cpupm->cur_spd != NULL) && 484 (level < cpupm->cur_spd->pm_level)) { 485 mutex_exit(&cpudsp->lock); 486 return (DDI_FAILURE); 487 } 488 } 489 490 for (new_spd = cpupm->head_spd; new_spd; new_spd = new_spd->down_spd) { 491 if (new_spd->pm_level == level) 492 break; 493 } 494 if (!new_spd) { 495 CPUDRV_PM_RESET_THROTTLE_THREAD(cpupm); 496 mutex_exit(&cpudsp->lock); 497 cmn_err(CE_WARN, "cpudrv_power: instance %d: " 498 "can't locate new CPU speed", instance); 499 return (DDI_FAILURE); 500 } 501 502 /* 503 * We currently refuse to power manage if the CPU is not ready to 504 * take cross calls (cross calls fail silently if CPU is not ready 505 * for it). 506 * 507 * Additionally, for x86 platforms we cannot power manage 508 * any one instance, until all instances have been initialized. 509 * That's because we don't know what the CPU domains look like 510 * until all instances have been initialized. 511 */ 512 is_ready = CPUDRV_PM_XCALL_IS_READY(cpudsp->cpu_id); 513 if (!is_ready) { 514 DPRINTF(D_POWER, ("cpudrv_power: instance %d: " 515 "CPU not ready for x-calls\n", instance)); 516 } else if (!(is_ready = cpudrv_pm_all_instances_ready())) { 517 DPRINTF(D_POWER, ("cpudrv_power: instance %d: " 518 "waiting for all CPUs to be ready\n", instance)); 519 } 520 if (!is_ready) { 521 CPUDRV_PM_RESET_THROTTLE_THREAD(cpupm); 522 mutex_exit(&cpudsp->lock); 523 return (DDI_FAILURE); 524 } 525 526 /* 527 * Execute CPU specific routine on the requested CPU to change its 528 * speed to normal-speed/divisor. 529 */ 530 if ((ret = cpudrv_pm_change_speed(cpudsp, new_spd)) != DDI_SUCCESS) { 531 cmn_err(CE_WARN, "cpudrv_power: cpudrv_pm_change_speed() " 532 "return = %d", ret); 533 mutex_exit(&cpudsp->lock); 534 return (DDI_FAILURE); 535 } 536 537 /* 538 * DTrace probe point for CPU speed change transition 539 */ 540 DTRACE_PROBE3(cpu__change__speed, cpudrv_devstate_t *, cpudsp, 541 cpudrv_pm_t *, cpupm, cpudrv_pm_spd_t *, new_spd); 542 543 /* 544 * Reset idle threshold time for the new power level. 545 */ 546 if ((cpupm->cur_spd != NULL) && (level < cpupm->cur_spd->pm_level)) { 547 if (pm_idle_component(dip, CPUDRV_PM_COMP_NUM) == 548 DDI_SUCCESS) { 549 if (cpupm->pm_busycnt >= 1) 550 cpupm->pm_busycnt--; 551 } else 552 cmn_err(CE_WARN, "cpudrv_power: instance %d: can't " 553 "idle CPU component", ddi_get_instance(dip)); 554 } 555 /* 556 * Reset various parameters because we are now running at new speed. 557 */ 558 cpupm->lastquan_mstate[CMS_IDLE] = 0; 559 cpupm->lastquan_mstate[CMS_SYSTEM] = 0; 560 cpupm->lastquan_mstate[CMS_USER] = 0; 561 cpupm->lastquan_lbolt = 0; 562 cpupm->cur_spd = new_spd; 563 CPUDRV_PM_RESET_THROTTLE_THREAD(cpupm); 564 mutex_exit(&cpudsp->lock); 565 566 return (DDI_SUCCESS); 567 } 568 569 /* 570 * Initialize the field that will be used for reporting 571 * the supported_frequencies_Hz cpu_info kstat. 572 */ 573 static void 574 set_supp_freqs(cpu_t *cp, cpudrv_pm_t *cpupm) 575 { 576 char *supp_freqs; 577 char *sfptr; 578 uint64_t *speeds; 579 cpudrv_pm_spd_t *spd; 580 int i; 581 #define UINT64_MAX_STRING (sizeof ("18446744073709551615")) 582 583 speeds = kmem_zalloc(cpupm->num_spd * sizeof (uint64_t), KM_SLEEP); 584 for (i = cpupm->num_spd - 1, spd = cpupm->head_spd; spd; 585 i--, spd = spd->down_spd) { 586 speeds[i] = 587 CPUDRV_PM_SPEED_HZ(cp->cpu_type_info.pi_clock, spd->speed); 588 } 589 590 supp_freqs = kmem_zalloc((UINT64_MAX_STRING * cpupm->num_spd), 591 KM_SLEEP); 592 sfptr = supp_freqs; 593 for (i = 0; i < cpupm->num_spd; i++) { 594 if (i == cpupm->num_spd - 1) { 595 (void) sprintf(sfptr, "%"PRIu64, speeds[i]); 596 } else { 597 (void) sprintf(sfptr, "%"PRIu64":", speeds[i]); 598 sfptr = supp_freqs + strlen(supp_freqs); 599 } 600 } 601 cpu_set_supp_freqs(cp, supp_freqs); 602 kmem_free(supp_freqs, (UINT64_MAX_STRING * cpupm->num_spd)); 603 kmem_free(speeds, cpupm->num_spd * sizeof (uint64_t)); 604 } 605 606 /* 607 * Initialize power management data. 608 */ 609 static int 610 cpudrv_pm_init(cpudrv_devstate_t *cpudsp) 611 { 612 cpudrv_pm_t *cpupm = &(cpudsp->cpudrv_pm); 613 cpudrv_pm_spd_t *cur_spd; 614 cpudrv_pm_spd_t *prev_spd = NULL; 615 int *speeds; 616 uint_t nspeeds; 617 int idle_cnt_percent; 618 int user_cnt_percent; 619 int i; 620 621 if (!cpudrv_pm_init_module(cpudsp)) 622 return (DDI_FAILURE); 623 624 CPUDRV_PM_GET_SPEEDS(cpudsp, speeds, nspeeds); 625 if (nspeeds < 2) { 626 /* Need at least two speeds to power manage */ 627 CPUDRV_PM_FREE_SPEEDS(speeds, nspeeds); 628 cpudrv_pm_free_module(cpudsp); 629 return (DDI_FAILURE); 630 } 631 cpupm->num_spd = nspeeds; 632 633 /* 634 * Calculate the watermarks and other parameters based on the 635 * supplied speeds. 636 * 637 * One of the basic assumption is that for X amount of CPU work, 638 * if CPU is slowed down by a factor of N, the time it takes to 639 * do the same work will be N * X. 640 * 641 * The driver declares that a CPU is idle and ready for slowed down, 642 * if amount of idle thread is more than the current speed idle_hwm 643 * without dropping below idle_hwm a number of consecutive sampling 644 * intervals and number of running threads in user mode are below 645 * user_lwm. We want to set the current user_lwm such that if we 646 * just switched to the next slower speed with no change in real work 647 * load, the amount of user threads at the slower speed will be such 648 * that it falls below the slower speed's user_hwm. If we didn't do 649 * that then we will just come back to the higher speed as soon as we 650 * go down even with no change in work load. 651 * The user_hwm is a fixed precentage and not calculated dynamically. 652 * 653 * We bring the CPU up if idle thread at current speed is less than 654 * the current speed idle_lwm for a number of consecutive sampling 655 * intervals or user threads are above the user_hwm for the current 656 * speed. 657 */ 658 for (i = 0; i < nspeeds; i++) { 659 cur_spd = kmem_zalloc(sizeof (cpudrv_pm_spd_t), KM_SLEEP); 660 cur_spd->speed = speeds[i]; 661 if (i == 0) { /* normal speed */ 662 cpupm->head_spd = cur_spd; 663 cur_spd->quant_cnt = CPUDRV_PM_QUANT_CNT_NORMAL; 664 cur_spd->idle_hwm = 665 (cpudrv_pm_idle_hwm * cur_spd->quant_cnt) / 100; 666 /* can't speed anymore */ 667 cur_spd->idle_lwm = 0; 668 cur_spd->user_hwm = UINT_MAX; 669 } else { 670 cur_spd->quant_cnt = CPUDRV_PM_QUANT_CNT_OTHR; 671 ASSERT(prev_spd != NULL); 672 prev_spd->down_spd = cur_spd; 673 cur_spd->up_spd = cpupm->head_spd; 674 675 /* 676 * Let's assume CPU is considered idle at full speed 677 * when it is spending I% of time in running the idle 678 * thread. At full speed, CPU will be busy (100 - I) % 679 * of times. This % of busyness increases by factor of 680 * N as CPU slows down. CPU that is idle I% of times 681 * in full speed, it is idle (100 - ((100 - I) * N)) % 682 * of times in N speed. The idle_lwm is a fixed 683 * percentage. A large value of N may result in 684 * idle_hwm to go below idle_lwm. We need to make sure 685 * that there is at least a buffer zone seperation 686 * between the idle_lwm and idle_hwm values. 687 */ 688 idle_cnt_percent = CPUDRV_PM_IDLE_CNT_PERCENT( 689 cpudrv_pm_idle_hwm, speeds, i); 690 idle_cnt_percent = max(idle_cnt_percent, 691 (cpudrv_pm_idle_lwm + cpudrv_pm_idle_buf_zone)); 692 cur_spd->idle_hwm = 693 (idle_cnt_percent * cur_spd->quant_cnt) / 100; 694 cur_spd->idle_lwm = 695 (cpudrv_pm_idle_lwm * cur_spd->quant_cnt) / 100; 696 697 /* 698 * The lwm for user threads are determined such that 699 * if CPU slows down, the load of work in the 700 * new speed would still keep the CPU at or below the 701 * user_hwm in the new speed. This is to prevent 702 * the quick jump back up to higher speed. 703 */ 704 cur_spd->user_hwm = (cpudrv_pm_user_hwm * 705 cur_spd->quant_cnt) / 100; 706 user_cnt_percent = CPUDRV_PM_USER_CNT_PERCENT( 707 cpudrv_pm_user_hwm, speeds, i); 708 prev_spd->user_lwm = 709 (user_cnt_percent * prev_spd->quant_cnt) / 100; 710 } 711 prev_spd = cur_spd; 712 } 713 /* Slowest speed. Can't slow down anymore */ 714 cur_spd->idle_hwm = UINT_MAX; 715 cur_spd->user_lwm = -1; 716 #ifdef DEBUG 717 DPRINTF(D_PM_INIT, ("cpudrv_pm_init: instance %d: head_spd spd %d, " 718 "num_spd %d\n", ddi_get_instance(cpudsp->dip), 719 cpupm->head_spd->speed, cpupm->num_spd)); 720 for (cur_spd = cpupm->head_spd; cur_spd; cur_spd = cur_spd->down_spd) { 721 DPRINTF(D_PM_INIT, ("cpudrv_pm_init: instance %d: speed %d, " 722 "down_spd spd %d, idle_hwm %d, user_lwm %d, " 723 "up_spd spd %d, idle_lwm %d, user_hwm %d, " 724 "quant_cnt %d\n", ddi_get_instance(cpudsp->dip), 725 cur_spd->speed, 726 (cur_spd->down_spd ? cur_spd->down_spd->speed : 0), 727 cur_spd->idle_hwm, cur_spd->user_lwm, 728 (cur_spd->up_spd ? cur_spd->up_spd->speed : 0), 729 cur_spd->idle_lwm, cur_spd->user_hwm, 730 cur_spd->quant_cnt)); 731 } 732 #endif /* DEBUG */ 733 CPUDRV_PM_FREE_SPEEDS(speeds, nspeeds); 734 return (DDI_SUCCESS); 735 } 736 737 /* 738 * Free CPU power management data. 739 */ 740 static void 741 cpudrv_pm_free(cpudrv_devstate_t *cpudsp) 742 { 743 cpudrv_pm_t *cpupm = &(cpudsp->cpudrv_pm); 744 cpudrv_pm_spd_t *cur_spd, *next_spd; 745 746 cur_spd = cpupm->head_spd; 747 while (cur_spd) { 748 next_spd = cur_spd->down_spd; 749 kmem_free(cur_spd, sizeof (cpudrv_pm_spd_t)); 750 cur_spd = next_spd; 751 } 752 bzero(cpupm, sizeof (cpudrv_pm_t)); 753 cpudrv_pm_free_module(cpudsp); 754 } 755 756 /* 757 * Create pm-components property. 758 */ 759 static int 760 cpudrv_pm_comp_create(cpudrv_devstate_t *cpudsp) 761 { 762 cpudrv_pm_t *cpupm = &(cpudsp->cpudrv_pm); 763 cpudrv_pm_spd_t *cur_spd; 764 char **pmc; 765 int size; 766 char name[] = "NAME=CPU Speed"; 767 int i, j; 768 uint_t comp_spd; 769 int result = DDI_FAILURE; 770 771 pmc = kmem_zalloc((cpupm->num_spd + 1) * sizeof (char *), KM_SLEEP); 772 size = CPUDRV_PM_COMP_SIZE(); 773 if (cpupm->num_spd > CPUDRV_PM_COMP_MAX_VAL) { 774 cmn_err(CE_WARN, "cpudrv_pm_comp_create: instance %d: " 775 "number of speeds exceeded limits", 776 ddi_get_instance(cpudsp->dip)); 777 kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *)); 778 return (result); 779 } 780 781 for (i = cpupm->num_spd, cur_spd = cpupm->head_spd; i > 0; 782 i--, cur_spd = cur_spd->down_spd) { 783 cur_spd->pm_level = i; 784 pmc[i] = kmem_zalloc((size * sizeof (char)), KM_SLEEP); 785 comp_spd = CPUDRV_PM_COMP_SPEED(cpupm, cur_spd); 786 if (comp_spd > CPUDRV_PM_COMP_MAX_VAL) { 787 cmn_err(CE_WARN, "cpudrv_pm_comp_create: " 788 "instance %d: speed exceeded limits", 789 ddi_get_instance(cpudsp->dip)); 790 for (j = cpupm->num_spd; j >= i; j--) { 791 kmem_free(pmc[j], size * sizeof (char)); 792 } 793 kmem_free(pmc, (cpupm->num_spd + 1) * 794 sizeof (char *)); 795 return (result); 796 } 797 CPUDRV_PM_COMP_SPRINT(pmc[i], cpupm, cur_spd, comp_spd) 798 DPRINTF(D_PM_COMP_CREATE, ("cpudrv_pm_comp_create: " 799 "instance %d: pm-components power level %d string '%s'\n", 800 ddi_get_instance(cpudsp->dip), i, pmc[i])); 801 } 802 pmc[0] = kmem_zalloc(sizeof (name), KM_SLEEP); 803 (void) strcat(pmc[0], name); 804 DPRINTF(D_PM_COMP_CREATE, ("cpudrv_pm_comp_create: instance %d: " 805 "pm-components component name '%s'\n", 806 ddi_get_instance(cpudsp->dip), pmc[0])); 807 808 if (ddi_prop_update_string_array(DDI_DEV_T_NONE, cpudsp->dip, 809 "pm-components", pmc, cpupm->num_spd + 1) == DDI_PROP_SUCCESS) { 810 result = DDI_SUCCESS; 811 } else { 812 cmn_err(CE_WARN, "cpudrv_pm_comp_create: instance %d: " 813 "can't create pm-components property", 814 ddi_get_instance(cpudsp->dip)); 815 } 816 817 for (i = cpupm->num_spd; i > 0; i--) { 818 kmem_free(pmc[i], size * sizeof (char)); 819 } 820 kmem_free(pmc[0], sizeof (name)); 821 kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *)); 822 return (result); 823 } 824 825 /* 826 * Mark a component idle. 827 */ 828 #define CPUDRV_PM_MONITOR_PM_IDLE_COMP(dip, cpupm) { \ 829 if ((cpupm)->pm_busycnt >= 1) { \ 830 if (pm_idle_component((dip), CPUDRV_PM_COMP_NUM) == \ 831 DDI_SUCCESS) { \ 832 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: " \ 833 "instance %d: pm_idle_component called\n", \ 834 ddi_get_instance((dip)))); \ 835 (cpupm)->pm_busycnt--; \ 836 } else { \ 837 cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: " \ 838 "can't idle CPU component", \ 839 ddi_get_instance((dip))); \ 840 } \ 841 } \ 842 } 843 844 /* 845 * Marks a component busy in both PM framework and driver state structure. 846 */ 847 #define CPUDRV_PM_MONITOR_PM_BUSY_COMP(dip, cpupm) { \ 848 if ((cpupm)->pm_busycnt < 1) { \ 849 if (pm_busy_component((dip), CPUDRV_PM_COMP_NUM) == \ 850 DDI_SUCCESS) { \ 851 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: " \ 852 "instance %d: pm_busy_component called\n", \ 853 ddi_get_instance((dip)))); \ 854 (cpupm)->pm_busycnt++; \ 855 } else { \ 856 cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: " \ 857 "can't busy CPU component", \ 858 ddi_get_instance((dip))); \ 859 } \ 860 } \ 861 } 862 863 /* 864 * Marks a component busy and calls pm_raise_power(). 865 */ 866 #define CPUDRV_PM_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, new_level) { \ 867 /* \ 868 * Mark driver and PM framework busy first so framework doesn't try \ 869 * to bring CPU to lower speed when we need to be at higher speed. \ 870 */ \ 871 CPUDRV_PM_MONITOR_PM_BUSY_COMP((dip), (cpupm)); \ 872 mutex_exit(&(cpudsp)->lock); \ 873 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: " \ 874 "pm_raise_power called to %d\n", ddi_get_instance((dip)), \ 875 (new_level))); \ 876 if (pm_raise_power((dip), CPUDRV_PM_COMP_NUM, (new_level)) != \ 877 DDI_SUCCESS) { \ 878 cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: can't " \ 879 "raise CPU power level", ddi_get_instance((dip))); \ 880 } \ 881 mutex_enter(&(cpudsp)->lock); \ 882 } 883 884 /* 885 * In order to monitor a CPU, we need to hold cpu_lock to access CPU 886 * statistics. Holding cpu_lock is not allowed from a callout routine. 887 * We dispatch a taskq to do that job. 888 */ 889 static void 890 cpudrv_pm_monitor_disp(void *arg) 891 { 892 cpudrv_devstate_t *cpudsp = (cpudrv_devstate_t *)arg; 893 894 /* 895 * We are here because the last task has scheduled a timeout. 896 * The queue should be empty at this time. 897 */ 898 mutex_enter(&cpudsp->cpudrv_pm.timeout_lock); 899 if (!taskq_dispatch(cpudsp->cpudrv_pm.tq, cpudrv_pm_monitor, arg, 900 TQ_NOSLEEP)) { 901 mutex_exit(&cpudsp->cpudrv_pm.timeout_lock); 902 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor_disp: failed to " 903 "dispatch the cpudrv_pm_monitor taskq\n")); 904 mutex_enter(&cpudsp->lock); 905 CPUDRV_PM_MONITOR_INIT(cpudsp); 906 mutex_exit(&cpudsp->lock); 907 return; 908 } 909 cpudsp->cpudrv_pm.timeout_count++; 910 mutex_exit(&cpudsp->cpudrv_pm.timeout_lock); 911 } 912 913 /* 914 * Monitors each CPU for the amount of time idle thread was running in the 915 * last quantum and arranges for the CPU to go to the lower or higher speed. 916 * Called at the time interval appropriate for the current speed. The 917 * time interval for normal speed is CPUDRV_PM_QUANT_CNT_NORMAL. The time 918 * interval for other speeds (including unknown speed) is 919 * CPUDRV_PM_QUANT_CNT_OTHR. 920 */ 921 static void 922 cpudrv_pm_monitor(void *arg) 923 { 924 cpudrv_devstate_t *cpudsp = (cpudrv_devstate_t *)arg; 925 cpudrv_pm_t *cpupm; 926 cpudrv_pm_spd_t *cur_spd, *new_spd; 927 cpu_t *cp; 928 dev_info_t *dip; 929 uint_t idle_cnt, user_cnt, system_cnt; 930 clock_t lbolt_cnt; 931 hrtime_t msnsecs[NCMSTATES]; 932 boolean_t is_ready; 933 934 #define GET_CPU_MSTATE_CNT(state, cnt) \ 935 msnsecs[state] = NSEC_TO_TICK(msnsecs[state]); \ 936 if (cpupm->lastquan_mstate[state] > msnsecs[state]) \ 937 msnsecs[state] = cpupm->lastquan_mstate[state]; \ 938 cnt = msnsecs[state] - cpupm->lastquan_mstate[state]; \ 939 cpupm->lastquan_mstate[state] = msnsecs[state] 940 941 mutex_enter(&cpudsp->lock); 942 cpupm = &(cpudsp->cpudrv_pm); 943 if (cpupm->timeout_id == 0) { 944 mutex_exit(&cpudsp->lock); 945 goto do_return; 946 } 947 cur_spd = cpupm->cur_spd; 948 dip = cpudsp->dip; 949 950 /* 951 * We assume that a CPU is initialized and has a valid cpu_t 952 * structure, if it is ready for cross calls. If this changes, 953 * additional checks might be needed. 954 * 955 * Additionally, for x86 platforms we cannot power manage 956 * any one instance, until all instances have been initialized. 957 * That's because we don't know what the CPU domains look like 958 * until all instances have been initialized. 959 */ 960 is_ready = CPUDRV_PM_XCALL_IS_READY(cpudsp->cpu_id); 961 if (!is_ready) { 962 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: " 963 "CPU not ready for x-calls\n", ddi_get_instance(dip))); 964 } else if (!(is_ready = cpudrv_pm_all_instances_ready())) { 965 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: " 966 "waiting for all CPUs to be ready\n", 967 ddi_get_instance(dip))); 968 } 969 if (!is_ready) { 970 /* 971 * Make sure that we are busy so that framework doesn't 972 * try to bring us down in this situation. 973 */ 974 CPUDRV_PM_MONITOR_PM_BUSY_COMP(dip, cpupm); 975 CPUDRV_PM_MONITOR_INIT(cpudsp); 976 mutex_exit(&cpudsp->lock); 977 goto do_return; 978 } 979 980 /* 981 * Make sure that we are still not at unknown power level. 982 */ 983 if (cur_spd == NULL) { 984 DPRINTF(D_PM_MONITOR, ("cpudrv_pm_monitor: instance %d: " 985 "cur_spd is unknown\n", ddi_get_instance(dip))); 986 CPUDRV_PM_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, 987 cpupm->targ_spd->pm_level); 988 /* 989 * We just changed the speed. Wait till at least next 990 * call to this routine before proceeding ahead. 991 */ 992 CPUDRV_PM_MONITOR_INIT(cpudsp); 993 mutex_exit(&cpudsp->lock); 994 goto do_return; 995 } 996 997 mutex_enter(&cpu_lock); 998 if ((cp = cpu_get(cpudsp->cpu_id)) == NULL) { 999 mutex_exit(&cpu_lock); 1000 CPUDRV_PM_MONITOR_INIT(cpudsp); 1001 mutex_exit(&cpudsp->lock); 1002 cmn_err(CE_WARN, "cpudrv_pm_monitor: instance %d: can't get " 1003 "cpu_t", ddi_get_instance(dip)); 1004 goto do_return; 1005 } 1006 1007 if (!cpupm->pm_started) { 1008 cpupm->pm_started = B_TRUE; 1009 set_supp_freqs(cp, cpupm); 1010 } 1011 1012 get_cpu_mstate(cp, msnsecs); 1013 GET_CPU_MSTATE_CNT(CMS_IDLE, idle_cnt); 1014 GET_CPU_MSTATE_CNT(CMS_USER, user_cnt); 1015 GET_CPU_MSTATE_CNT(CMS_SYSTEM, system_cnt); 1016 1017 /* 1018 * We can't do anything when we have just switched to a state 1019 * because there is no valid timestamp. 1020 */ 1021 if (cpupm->lastquan_lbolt == 0) { 1022 cpupm->lastquan_lbolt = lbolt; 1023 mutex_exit(&cpu_lock); 1024 CPUDRV_PM_MONITOR_INIT(cpudsp); 1025 mutex_exit(&cpudsp->lock); 1026 goto do_return; 1027 } 1028 1029 /* 1030 * Various watermarks are based on this routine being called back 1031 * exactly at the requested period. This is not guaranteed 1032 * because this routine is called from a taskq that is dispatched 1033 * from a timeout routine. Handle this by finding out how many 1034 * ticks have elapsed since the last call (lbolt_cnt) and adjusting 1035 * the idle_cnt based on the delay added to the requested period 1036 * by timeout and taskq. 1037 */ 1038 lbolt_cnt = lbolt - cpupm->lastquan_lbolt; 1039 cpupm->lastquan_lbolt = lbolt; 1040 mutex_exit(&cpu_lock); 1041 /* 1042 * Time taken between recording the current counts and 1043 * arranging the next call of this routine is an error in our 1044 * calculation. We minimize the error by calling 1045 * CPUDRV_PM_MONITOR_INIT() here instead of end of this routine. 1046 */ 1047 CPUDRV_PM_MONITOR_INIT(cpudsp); 1048 DPRINTF(D_PM_MONITOR_VERBOSE, ("cpudrv_pm_monitor: instance %d: " 1049 "idle count %d, user count %d, system count %d, pm_level %d, " 1050 "pm_busycnt %d\n", ddi_get_instance(dip), idle_cnt, user_cnt, 1051 system_cnt, cur_spd->pm_level, cpupm->pm_busycnt)); 1052 1053 #ifdef DEBUG 1054 /* 1055 * Notify that timeout and taskq has caused delays and we need to 1056 * scale our parameters accordingly. 1057 * 1058 * To get accurate result, don't turn on other DPRINTFs with 1059 * the following DPRINTF. PROM calls generated by other 1060 * DPRINTFs changes the timing. 1061 */ 1062 if (lbolt_cnt > cur_spd->quant_cnt) { 1063 DPRINTF(D_PM_MONITOR_DELAY, ("cpudrv_pm_monitor: instance %d: " 1064 "lbolt count %ld > quantum_count %u\n", 1065 ddi_get_instance(dip), lbolt_cnt, cur_spd->quant_cnt)); 1066 } 1067 #endif /* DEBUG */ 1068 1069 /* 1070 * Adjust counts based on the delay added by timeout and taskq. 1071 */ 1072 idle_cnt = (idle_cnt * cur_spd->quant_cnt) / lbolt_cnt; 1073 user_cnt = (user_cnt * cur_spd->quant_cnt) / lbolt_cnt; 1074 if ((user_cnt > cur_spd->user_hwm) || (idle_cnt < cur_spd->idle_lwm && 1075 cur_spd->idle_blwm_cnt >= cpudrv_pm_idle_blwm_cnt_max)) { 1076 cur_spd->idle_blwm_cnt = 0; 1077 cur_spd->idle_bhwm_cnt = 0; 1078 /* 1079 * In normal situation, arrange to go to next higher speed. 1080 * If we are running in special direct pm mode, we just stay 1081 * at the current speed. 1082 */ 1083 if (cur_spd == cur_spd->up_spd || cpudrv_direct_pm) { 1084 CPUDRV_PM_MONITOR_PM_BUSY_COMP(dip, cpupm); 1085 } else { 1086 new_spd = cur_spd->up_spd; 1087 CPUDRV_PM_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, 1088 new_spd->pm_level); 1089 } 1090 } else if ((user_cnt <= cur_spd->user_lwm) && 1091 (idle_cnt >= cur_spd->idle_hwm) || !CPU_ACTIVE(cp)) { 1092 cur_spd->idle_blwm_cnt = 0; 1093 cur_spd->idle_bhwm_cnt = 0; 1094 /* 1095 * Arrange to go to next lower speed by informing our idle 1096 * status to the power management framework. 1097 */ 1098 CPUDRV_PM_MONITOR_PM_IDLE_COMP(dip, cpupm); 1099 } else { 1100 /* 1101 * If we are between the idle water marks and have not 1102 * been here enough consecutive times to be considered 1103 * busy, just increment the count and return. 1104 */ 1105 if ((idle_cnt < cur_spd->idle_hwm) && 1106 (idle_cnt >= cur_spd->idle_lwm) && 1107 (cur_spd->idle_bhwm_cnt < cpudrv_pm_idle_bhwm_cnt_max)) { 1108 cur_spd->idle_blwm_cnt = 0; 1109 cur_spd->idle_bhwm_cnt++; 1110 mutex_exit(&cpudsp->lock); 1111 goto do_return; 1112 } 1113 if (idle_cnt < cur_spd->idle_lwm) { 1114 cur_spd->idle_blwm_cnt++; 1115 cur_spd->idle_bhwm_cnt = 0; 1116 } 1117 /* 1118 * Arranges to stay at the current speed. 1119 */ 1120 CPUDRV_PM_MONITOR_PM_BUSY_COMP(dip, cpupm); 1121 } 1122 mutex_exit(&cpudsp->lock); 1123 do_return: 1124 mutex_enter(&cpupm->timeout_lock); 1125 ASSERT(cpupm->timeout_count > 0); 1126 cpupm->timeout_count--; 1127 cv_signal(&cpupm->timeout_cv); 1128 mutex_exit(&cpupm->timeout_lock); 1129 } 1130