1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2020 Oxide Computer Company 24 */ 25 26 #include <sys/hpet_acpi.h> 27 #include <sys/hpet.h> 28 #include <sys/bitmap.h> 29 #include <sys/inttypes.h> 30 #include <sys/time.h> 31 #include <sys/sunddi.h> 32 #include <sys/ksynch.h> 33 #include <sys/apic.h> 34 #include <sys/callb.h> 35 #include <sys/clock.h> 36 #include <sys/archsystm.h> 37 #include <sys/cpupart.h> 38 #include <sys/x86_archext.h> 39 #include <sys/prom_debug.h> 40 41 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags); 42 static boolean_t hpet_install_proxy(void); 43 static boolean_t hpet_callback(int code); 44 static boolean_t hpet_cpr(int code); 45 static boolean_t hpet_resume(void); 46 static void hpet_cst_callback(uint32_t code); 47 static boolean_t hpet_deep_idle_config(int code); 48 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table); 49 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len); 50 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table); 51 static int hpet_start_main_counter(hpet_info_t *hip); 52 static int hpet_stop_main_counter(hpet_info_t *hip); 53 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip); 54 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value); 55 static uint64_t hpet_read_gen_cap(hpet_info_t *hip); 56 static uint64_t hpet_read_gen_config(hpet_info_t *hip); 57 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip); 58 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n); 59 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf); 60 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l); 61 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l); 62 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l); 63 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l); 64 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n); 65 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n); 66 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip); 67 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n); 68 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n); 69 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, 70 uint32_t interrupt); 71 static uint_t hpet_isr(caddr_t, caddr_t); 72 static uint32_t hpet_install_interrupt_handler(avfunc func, int vector); 73 static void hpet_uninstall_interrupt_handler(void); 74 static void hpet_expire_all(void); 75 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time); 76 static boolean_t hpet_use_hpet_timer(hrtime_t *expire); 77 static void hpet_use_lapic_timer(hrtime_t expire); 78 static void hpet_init_proxy_data(void); 79 80 /* 81 * hpet_state_lock is used to synchronize disabling/enabling deep c-states 82 * and to synchronize suspend/resume. 83 */ 84 static kmutex_t hpet_state_lock; 85 static struct hpet_state { 86 boolean_t proxy_installed; /* CBE proxy interrupt setup */ 87 boolean_t cpr; /* currently in CPR */ 88 boolean_t cpu_deep_idle; /* user enable/disable */ 89 boolean_t uni_cstate; /* disable if only one cstate */ 90 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE}; 91 92 uint64_t hpet_spin_check = HPET_SPIN_CHECK; 93 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT; 94 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT; 95 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT; 96 97 static kmutex_t hpet_proxy_lock; /* lock for lAPIC proxy data */ 98 /* 99 * hpet_proxy_users is a per-cpu array. 100 */ 101 static hpet_proxy_t *hpet_proxy_users; /* one per CPU */ 102 103 104 ACPI_TABLE_HPET *hpet_table; /* ACPI HPET table */ 105 hpet_info_t hpet_info; /* Human readable Information */ 106 107 /* 108 * Provide HPET access from unix.so. 109 * Set up pointers to access symbols in pcplusmp. 110 */ 111 static void 112 hpet_establish_hooks(void) 113 { 114 hpet.install_proxy = &hpet_install_proxy; 115 hpet.callback = &hpet_callback; 116 hpet.use_hpet_timer = &hpet_use_hpet_timer; 117 hpet.use_lapic_timer = &hpet_use_lapic_timer; 118 } 119 120 /* 121 * Get the ACPI "HPET" table. 122 * acpi_probe() calls this function from mp_startup before drivers are loaded. 123 * acpi_probe() verified the system is using ACPI before calling this. 124 * 125 * There may be more than one ACPI HPET table (Itanium only?). 126 * Intel's HPET spec defines each timer block to have up to 32 counters and 127 * be 1024 bytes long. There can be more than one timer block of 32 counters. 128 * Each timer block would have an additional ACPI HPET table. 129 * Typical x86 systems today only have 1 HPET with 3 counters. 130 * On x86 we only consume HPET table "1" for now. 131 */ 132 int 133 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags) 134 { 135 extern hrtime_t tsc_read(void); 136 extern int idle_cpu_no_deep_c; 137 extern int cpuid_deep_cstates_supported(void); 138 void *la; 139 uint64_t ret; 140 uint_t num_timers; 141 uint_t ti; 142 143 (void) memset(&hpet_info, 0, sizeof (hpet_info)); 144 hpet.supported = HPET_NO_SUPPORT; 145 146 if ((get_hwenv() & HW_XEN_HVM) != 0) { 147 /* 148 * In some AWS EC2 guests, though the HPET is advertised via 149 * ACPI, programming the interrupt on the non-legacy timer can 150 * result in an immediate reset of the instance. It is not 151 * currently possible to tell whether this is an instance with 152 * broken HPET emulation or not, so we simply disable it across 153 * the board. 154 */ 155 PRM_POINT("will not program HPET in Xen HVM"); 156 return (DDI_FAILURE); 157 } 158 159 if (idle_cpu_no_deep_c || 160 !cpuid_deep_cstates_supported()) { 161 /* 162 * If Deep C-States are disabled or not supported, then we do 163 * not need to program the HPET at all as it will not 164 * subsequently be used. 165 */ 166 PRM_POINT("no need to program the HPET"); 167 return (DDI_FAILURE); 168 } 169 170 hpet_establish_hooks(); 171 172 /* 173 * Get HPET ACPI table 1. 174 */ 175 PRM_POINT("AcpiGetTable() HPET #1"); 176 if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1, 177 (ACPI_TABLE_HEADER **)&hpet_table))) { 178 cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table"); 179 return (DDI_FAILURE); 180 } 181 182 if (hpet_validate_table(hpet_table) != AE_OK) { 183 cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table"); 184 return (DDI_FAILURE); 185 } 186 187 PRM_POINT("hpet_memory_map()"); 188 la = hpet_memory_map(hpet_table); 189 PRM_DEBUG(la); 190 if (la == NULL) { 191 cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed"); 192 return (DDI_FAILURE); 193 } 194 hpet_info.logical_address = la; 195 196 PRM_POINT("hpet_read_gen_cap()"); 197 ret = hpet_read_gen_cap(&hpet_info); 198 PRM_DEBUG(ret); 199 hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret); 200 hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret); 201 hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret); 202 hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret); 203 /* 204 * Hardware contains the last timer's number. 205 * Add 1 to get the number of timers. 206 */ 207 hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1; 208 hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret); 209 210 if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) { 211 cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx", 212 (long)hpet_info.gen_cap.counter_clk_period, 213 (long)HPET_MAX_CLK_PERIOD); 214 return (DDI_FAILURE); 215 } 216 217 num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap; 218 PRM_DEBUG(num_timers); 219 if ((num_timers < 3) || (num_timers > 32)) { 220 cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers " 221 "%lx", (long)num_timers); 222 return (DDI_FAILURE); 223 } 224 hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc( 225 num_timers * sizeof (uint64_t), KM_SLEEP); 226 227 PRM_POINT("hpet_read_gen_config()"); 228 ret = hpet_read_gen_config(&hpet_info); 229 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); 230 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); 231 232 /* 233 * illumos does not use the HPET Legacy Replacement Route capabilities. 234 * This feature has been off by default on test systems. 235 * The HPET spec does not specify if Legacy Replacement Route is 236 * on or off by default, so we explicitly set it off here. 237 * It should not matter which mode the HPET is in since we use 238 * the first available non-legacy replacement timer: timer 2. 239 */ 240 PRM_POINT("hpet_read_gen_config()"); 241 (void) hpet_set_leg_rt_cnf(&hpet_info, 0); 242 243 PRM_POINT("hpet_read_gen_config() again"); 244 ret = hpet_read_gen_config(&hpet_info); 245 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); 246 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); 247 248 hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info); 249 hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info); 250 251 PRM_POINT("disable timer loop..."); 252 for (ti = 0; ti < num_timers; ++ti) { 253 ret = hpet_read_timer_N_config(&hpet_info, ti); 254 /* 255 * Make sure no timers are enabled (think fast reboot or 256 * virtual hardware). 257 */ 258 if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) { 259 hpet_disable_timer(&hpet_info, ti); 260 ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; 261 } 262 263 hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret); 264 } 265 PRM_POINT("disable timer loop complete"); 266 267 /* 268 * Be aware the Main Counter may need to be initialized in the future 269 * if it is used for more than just Deep C-State support. 270 * The HPET's Main Counter does not need to be initialize to a specific 271 * value before starting it for use to wake up CPUs from Deep C-States. 272 */ 273 PRM_POINT("hpet_start_main_counter()"); 274 if (hpet_start_main_counter(&hpet_info) != AE_OK) { 275 cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed"); 276 return (DDI_FAILURE); 277 } 278 279 hpet_info.period = hpet_info.gen_cap.counter_clk_period; 280 /* 281 * Read main counter twice to record HPET latency for debugging. 282 */ 283 PRM_POINT("TSC and HPET reads:"); 284 hpet_info.tsc[0] = tsc_read(); 285 hpet_info.hpet_main_counter_reads[0] = 286 hpet_read_main_counter_value(&hpet_info); 287 hpet_info.tsc[1] = tsc_read(); 288 hpet_info.hpet_main_counter_reads[1] = 289 hpet_read_main_counter_value(&hpet_info); 290 hpet_info.tsc[2] = tsc_read(); 291 292 PRM_DEBUG(hpet_info.hpet_main_counter_reads[0]); 293 PRM_DEBUG(hpet_info.hpet_main_counter_reads[1]); 294 PRM_DEBUG(hpet_info.tsc[0]); 295 PRM_DEBUG(hpet_info.tsc[1]); 296 PRM_DEBUG(hpet_info.tsc[2]); 297 298 ret = hpet_read_gen_config(&hpet_info); 299 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret); 300 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret); 301 302 /* 303 * HPET main counter reads are supported now. 304 */ 305 hpet.supported = HPET_TIMER_SUPPORT; 306 307 return (hpet_init_proxy(hpet_vect, hpet_flags)); 308 } 309 310 void 311 hpet_acpi_fini(void) 312 { 313 if (hpet.supported == HPET_NO_SUPPORT) 314 return; 315 if (hpet.supported >= HPET_TIMER_SUPPORT) 316 (void) hpet_stop_main_counter(&hpet_info); 317 if (hpet.supported > HPET_TIMER_SUPPORT) 318 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer); 319 } 320 321 /* 322 * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled 323 * LAPIC Timers. Get a free HPET timer that supports I/O APIC routed interrupt. 324 * Setup data to handle the timer's ISR, and add the timer's interrupt. 325 * 326 * The ddi cannot be use to allocate the HPET timer's interrupt. 327 * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC 328 * to handle the HPET timer's interrupt. 329 * 330 * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of 331 * ICH9. The HPET spec allows for MSI. In the future MSI may be prefered. 332 */ 333 static int 334 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags) 335 { 336 PRM_POINT("hpet_get_IOAPIC_intr_capable_timer()"); 337 if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) { 338 cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed."); 339 return (DDI_FAILURE); 340 } 341 342 hpet_init_proxy_data(); 343 344 PRM_POINT("hpet_install_interrupt_handler()"); 345 if (hpet_install_interrupt_handler(&hpet_isr, 346 hpet_info.cstate_timer.intr) != AE_OK) { 347 cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed."); 348 return (DDI_FAILURE); 349 } 350 *hpet_vect = hpet_info.cstate_timer.intr; 351 hpet_flags->intr_el = INTR_EL_LEVEL; 352 hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH; 353 hpet_flags->bustype = BUS_PCI; /* we *do* conform to PCI */ 354 355 /* 356 * Avoid a possibly stuck interrupt by programing the HPET's timer here 357 * before the I/O APIC is programmed to handle this interrupt. 358 */ 359 PRM_POINT("hpet_timer_set_up()"); 360 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer, 361 hpet_info.cstate_timer.intr); 362 PRM_POINT("back from hpet_timer_set_up()"); 363 364 /* 365 * All HPET functionality is supported. 366 */ 367 hpet.supported = HPET_FULL_SUPPORT; 368 PRM_POINT("HPET full support"); 369 return (DDI_SUCCESS); 370 } 371 372 /* 373 * Called by kernel if it can support Deep C-States. 374 */ 375 static boolean_t 376 hpet_install_proxy(void) 377 { 378 if (hpet_state.proxy_installed == B_TRUE) 379 return (B_TRUE); 380 381 if (hpet.supported != HPET_FULL_SUPPORT) 382 return (B_FALSE); 383 384 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer); 385 hpet_state.proxy_installed = B_TRUE; 386 387 return (B_TRUE); 388 } 389 390 /* 391 * Remove the interrupt that was added with add_avintr() in 392 * hpet_install_interrupt_handler(). 393 */ 394 static void 395 hpet_uninstall_interrupt_handler(void) 396 { 397 rem_avintr(NULL, CBE_HIGH_PIL, &hpet_isr, hpet_info.cstate_timer.intr); 398 } 399 400 static int 401 hpet_validate_table(ACPI_TABLE_HPET *hpet_table) 402 { 403 ACPI_TABLE_HEADER *table_header = (ACPI_TABLE_HEADER *)hpet_table; 404 405 if (table_header->Length != sizeof (ACPI_TABLE_HPET)) { 406 cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof (" 407 "ACPI_TABLE_HPET) %lx.", 408 (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length, 409 (unsigned long)sizeof (ACPI_TABLE_HPET)); 410 return (AE_ERROR); 411 } 412 413 if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) { 414 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table " 415 "signature"); 416 return (AE_ERROR); 417 } 418 419 if (!hpet_checksum_table((unsigned char *)hpet_table, 420 (unsigned int)table_header->Length)) { 421 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum"); 422 return (AE_ERROR); 423 } 424 425 /* 426 * Sequence should be table number - 1. We are using table 1. 427 */ 428 if (hpet_table->Sequence != HPET_TABLE_1 - 1) { 429 cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx", 430 (long)hpet_table->Sequence); 431 return (AE_ERROR); 432 } 433 434 return (AE_OK); 435 } 436 437 static boolean_t 438 hpet_checksum_table(unsigned char *table, unsigned int length) 439 { 440 unsigned char checksum = 0; 441 int i; 442 443 for (i = 0; i < length; ++i, ++table) 444 checksum += *table; 445 446 return (checksum == 0); 447 } 448 449 static void * 450 hpet_memory_map(ACPI_TABLE_HPET *hpet_table) 451 { 452 return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE)); 453 } 454 455 static int 456 hpet_start_main_counter(hpet_info_t *hip) 457 { 458 uint64_t *gcr_ptr; 459 uint64_t gcr; 460 461 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address); 462 gcr = *gcr_ptr; 463 464 gcr |= HPET_GCFR_ENABLE_CNF; 465 *gcr_ptr = gcr; 466 gcr = *gcr_ptr; 467 468 return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK); 469 } 470 471 static int 472 hpet_stop_main_counter(hpet_info_t *hip) 473 { 474 uint64_t *gcr_ptr; 475 uint64_t gcr; 476 477 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address); 478 gcr = *gcr_ptr; 479 480 gcr &= ~HPET_GCFR_ENABLE_CNF; 481 *gcr_ptr = gcr; 482 gcr = *gcr_ptr; 483 484 return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK); 485 } 486 487 /* 488 * Set the Legacy Replacement Route bit. 489 * This should be called before setting up timers. 490 * The HPET specification is silent regarding setting this after timers are 491 * programmed. 492 */ 493 static uint64_t 494 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value) 495 { 496 uint64_t gen_conf = hpet_read_gen_config(hip); 497 498 switch (new_value) { 499 case 0: 500 gen_conf &= ~HPET_GCFR_LEG_RT_CNF; 501 break; 502 503 case HPET_GCFR_LEG_RT_CNF: 504 gen_conf |= HPET_GCFR_LEG_RT_CNF; 505 break; 506 507 default: 508 ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF); 509 break; 510 } 511 hpet_write_gen_config(hip, gen_conf); 512 return (gen_conf); 513 } 514 515 static uint64_t 516 hpet_read_gen_cap(hpet_info_t *hip) 517 { 518 return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address)); 519 } 520 521 static uint64_t 522 hpet_read_gen_config(hpet_info_t *hip) 523 { 524 return (*(uint64_t *) 525 HPET_GEN_CONFIG_ADDRESS(hip->logical_address)); 526 } 527 528 static uint64_t 529 hpet_read_gen_intrpt_stat(hpet_info_t *hip) 530 { 531 hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS( 532 hip->logical_address); 533 return (hip->gen_intrpt_stat); 534 } 535 536 static uint64_t 537 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n) 538 { 539 uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS( 540 hip->logical_address, n); 541 hip->timer_n_config[n] = hpet_convert_timer_N_config(conf); 542 return (conf); 543 } 544 545 static hpet_TN_conf_cap_t 546 hpet_convert_timer_N_config(uint64_t conf) 547 { 548 hpet_TN_conf_cap_t cc = { 0 }; 549 550 cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf); 551 cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf); 552 cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf); 553 cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf); 554 cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf); 555 cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf); 556 cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf); 557 cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf); 558 cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf); 559 cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf); 560 cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf); 561 562 return (cc); 563 } 564 565 static uint64_t 566 hpet_read_main_counter_value(hpet_info_t *hip) 567 { 568 uint64_t value; 569 uint32_t *counter; 570 uint32_t high1, high2, low; 571 572 counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address); 573 574 /* 575 * 32-bit main counters 576 */ 577 if (hip->gen_cap.count_size_cap == 0) { 578 value = (uint64_t)*counter; 579 hip->main_counter_value = value; 580 return (value); 581 } 582 583 /* 584 * HPET spec claims a 64-bit read can be split into two 32-bit reads 585 * by the hardware connection to the HPET. 586 */ 587 high2 = counter[1]; 588 do { 589 high1 = high2; 590 low = counter[0]; 591 high2 = counter[1]; 592 } while (high2 != high1); 593 594 value = ((uint64_t)high1 << 32) | low; 595 hip->main_counter_value = value; 596 return (value); 597 } 598 599 static void 600 hpet_write_gen_config(hpet_info_t *hip, uint64_t l) 601 { 602 *(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l; 603 } 604 605 static void 606 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l) 607 { 608 *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l; 609 } 610 611 static void 612 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t conf) 613 { 614 /* 615 * The configuration register size is not affected by the size 616 * capability; it is always a 64-bit value. The top 32-bit half of 617 * this register is always read-only so we constrain our write to the 618 * bottom half. 619 */ 620 uint32_t *confaddr = (uint32_t *)HPET_TIMER_N_CONF_ADDRESS( 621 hip->logical_address, n); 622 uint32_t conf32 = 0xFFFFFFFF & conf; 623 624 PRM_DEBUG(n); 625 PRM_DEBUG(conf); 626 PRM_DEBUG(conf32); 627 628 *confaddr = conf32; 629 630 PRM_POINT("write done"); 631 } 632 633 static void 634 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l) 635 { 636 *(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l; 637 } 638 639 static void 640 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n) 641 { 642 uint64_t l; 643 644 l = hpet_read_timer_N_config(hip, timer_n); 645 l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; 646 hpet_write_timer_N_config(hip, timer_n, l); 647 } 648 649 static void 650 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n) 651 { 652 uint64_t l; 653 654 l = hpet_read_timer_N_config(hip, timer_n); 655 l |= HPET_TIMER_N_INT_ENB_CNF_BIT; 656 hpet_write_timer_N_config(hip, timer_n, l); 657 } 658 659 /* 660 * Add the interrupt handler for I/O APIC interrupt number (interrupt line). 661 * 662 * The I/O APIC line (vector) is programmed in ioapic_init_intr() called 663 * from apic_picinit() psm_ops apic_ops entry point after we return from 664 * apic_init() psm_ops entry point. 665 */ 666 static uint32_t 667 hpet_install_interrupt_handler(avfunc func, int vector) 668 { 669 uint32_t retval; 670 671 retval = add_avintr(NULL, CBE_HIGH_PIL, func, "HPET Timer", 672 vector, NULL, NULL, NULL, NULL); 673 if (retval == 0) { 674 cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed"); 675 return (AE_BAD_PARAMETER); 676 } 677 return (AE_OK); 678 } 679 680 /* 681 * The HPET timers specify which I/O APIC interrupts they can be routed to. 682 * Find the first available non-legacy-replacement timer and its I/O APIC irq. 683 * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each 684 * timer's timer_n_config register. 685 */ 686 static int 687 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip) 688 { 689 int timer; 690 int intr; 691 692 for (timer = HPET_FIRST_NON_LEGACY_TIMER; 693 timer < hip->gen_cap.num_tim_cap; ++timer) { 694 if (!hpet_timer_available(hip->allocated_timers, timer)) 695 continue; 696 697 intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1; 698 699 PRM_DEBUG(timer); 700 PRM_DEBUG(intr); 701 702 if (intr >= 0) { 703 hpet_timer_alloc(&hip->allocated_timers, timer); 704 hip->cstate_timer.timer = timer; 705 hip->cstate_timer.intr = intr; 706 return (timer); 707 } 708 } 709 710 return (-1); 711 } 712 713 /* 714 * Mark this timer as used. 715 */ 716 static void 717 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n) 718 { 719 *allocated_timers |= 1 << n; 720 } 721 722 /* 723 * Check if this timer is available. 724 * No mutual exclusion because only one thread uses this. 725 */ 726 static int 727 hpet_timer_available(uint32_t allocated_timers, uint32_t n) 728 { 729 return ((allocated_timers & (1 << n)) == 0); 730 } 731 732 /* 733 * Setup timer N to route its interrupt to I/O APIC. 734 */ 735 static void 736 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt) 737 { 738 uint64_t conf; 739 740 PRM_DEBUG(timer_n); 741 PRM_DEBUG(interrupt); 742 743 PRM_POINT("hpet_read_timer_N_config()"); 744 conf = hpet_read_timer_N_config(hip, timer_n); 745 PRM_DEBUG(conf); 746 747 /* 748 * Caller is required to verify this interrupt route is supported. 749 */ 750 ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt)); 751 752 conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT; /* use IOAPIC */ 753 conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt); 754 conf &= ~HPET_TIMER_N_TYPE_CNF_BIT; /* non periodic */ 755 conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */ 756 conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */ 757 758 PRM_POINT("hpet_write_timer_N_config()"); 759 PRM_DEBUG(conf); 760 hpet_write_timer_N_config(hip, timer_n, conf); 761 PRM_POINT("back from hpet_write_timer_N_config()"); 762 } 763 764 /* 765 * The HPET's Main Counter is not stopped before programming an HPET timer. 766 * This will allow the HPET to be used as a time source. 767 * The programmed timer interrupt may occur before this function returns. 768 * Callers must block interrupts before calling this function if they must 769 * guarantee the interrupt is handled after this function returns. 770 * 771 * Return 0 if main counter is less than timer after enabling timer. 772 * The interrupt was programmed, but it may fire before this returns. 773 * Return !0 if main counter is greater than timer after enabling timer. 774 * In other words: the timer will not fire, and we do not know if it did fire. 775 * 776 * delta is in HPET ticks. 777 * 778 * Writing a 64-bit value to a 32-bit register will "wrap around". 779 * A 32-bit HPET timer will wrap around in a little over 5 minutes. 780 */ 781 int 782 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta) 783 { 784 uint64_t time, program; 785 786 program = hpet_read_main_counter_value(hip); 787 program += delta; 788 hpet_write_timer_N_comp(hip, timer, program); 789 790 time = hpet_read_main_counter_value(hip); 791 if (time < program) 792 return (AE_OK); 793 794 return (AE_TIME); 795 } 796 797 /* 798 * CPR and power policy-change callback entry point. 799 */ 800 boolean_t 801 hpet_callback(int code) 802 { 803 switch (code) { 804 case PM_DEFAULT_CPU_DEEP_IDLE: 805 /*FALLTHROUGH*/ 806 case PM_ENABLE_CPU_DEEP_IDLE: 807 /*FALLTHROUGH*/ 808 case PM_DISABLE_CPU_DEEP_IDLE: 809 return (hpet_deep_idle_config(code)); 810 811 case CB_CODE_CPR_RESUME: 812 /*FALLTHROUGH*/ 813 case CB_CODE_CPR_CHKPT: 814 return (hpet_cpr(code)); 815 816 case CST_EVENT_MULTIPLE_CSTATES: 817 hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES); 818 return (B_TRUE); 819 820 case CST_EVENT_ONE_CSTATE: 821 hpet_cst_callback(CST_EVENT_ONE_CSTATE); 822 return (B_TRUE); 823 824 default: 825 cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code); 826 return (B_FALSE); 827 } 828 } 829 830 /* 831 * According to the HPET spec 1.0a: the Operating System must save and restore 832 * HPET event timer hardware context through ACPI sleep state transitions. 833 * Timer registers (including the main counter) may not be preserved through 834 * ACPI S3, S4, or S5 sleep states. This code does not not support S1 nor S2. 835 * 836 * Current HPET state is already in hpet.supported and 837 * hpet_state.proxy_installed. hpet_info contains the proxy interrupt HPET 838 * Timer state. 839 * 840 * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or 841 * S4, and it is not saved/restored here. Future projects cannot expect the 842 * Main Counter to be monotomically (or accurately) increasing across CPR. 843 * 844 * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all 845 * CPUs are awake and in a spin loop before the system suspends. The HPET is 846 * not needed for Deep C-state wakeup when CPUs are in cpu_pause(). 847 * It is safe to leave the HPET running as the system suspends; we just 848 * disable the timer from generating interrupts here. 849 */ 850 static boolean_t 851 hpet_cpr(int code) 852 { 853 ulong_t intr, dead_count = 0; 854 hrtime_t dead = gethrtime() + hpet_spin_timeout; 855 boolean_t ret = B_TRUE; 856 857 mutex_enter(&hpet_state_lock); 858 switch (code) { 859 case CB_CODE_CPR_CHKPT: 860 if (hpet_state.proxy_installed == B_FALSE) 861 break; 862 863 hpet_state.cpr = B_TRUE; 864 865 intr = intr_clear(); 866 while (!mutex_tryenter(&hpet_proxy_lock)) { 867 /* 868 * spin 869 */ 870 intr_restore(intr); 871 if (dead_count++ > hpet_spin_check) { 872 dead_count = 0; 873 if (gethrtime() > dead) { 874 hpet_state.cpr = B_FALSE; 875 mutex_exit(&hpet_state_lock); 876 cmn_err(CE_NOTE, "!hpet_cpr: deadman"); 877 return (B_FALSE); 878 } 879 } 880 intr = intr_clear(); 881 } 882 hpet_expire_all(); 883 mutex_exit(&hpet_proxy_lock); 884 intr_restore(intr); 885 886 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer); 887 break; 888 889 case CB_CODE_CPR_RESUME: 890 if (hpet_resume() == B_TRUE) 891 hpet_state.cpr = B_FALSE; 892 else 893 cmn_err(CE_NOTE, "!hpet_resume failed."); 894 break; 895 896 default: 897 cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code); 898 ret = B_FALSE; 899 break; 900 } 901 mutex_exit(&hpet_state_lock); 902 return (ret); 903 } 904 905 /* 906 * Assume the HPET stopped in Suspend state and timer state was lost. 907 */ 908 static boolean_t 909 hpet_resume(void) 910 { 911 if (hpet.supported != HPET_TIMER_SUPPORT) 912 return (B_TRUE); 913 914 /* 915 * The HPET spec does not specify if Legacy Replacement Route is 916 * on or off by default, so we set it off here. 917 */ 918 (void) hpet_set_leg_rt_cnf(&hpet_info, 0); 919 920 if (hpet_start_main_counter(&hpet_info) != AE_OK) { 921 cmn_err(CE_NOTE, "!hpet_resume: start main counter failed"); 922 hpet.supported = HPET_NO_SUPPORT; 923 if (hpet_state.proxy_installed == B_TRUE) { 924 hpet_state.proxy_installed = B_FALSE; 925 hpet_uninstall_interrupt_handler(); 926 } 927 return (B_FALSE); 928 } 929 930 if (hpet_state.proxy_installed == B_FALSE) 931 return (B_TRUE); 932 933 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer, 934 hpet_info.cstate_timer.intr); 935 if (hpet_state.cpu_deep_idle == B_TRUE) 936 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer); 937 938 return (B_TRUE); 939 } 940 941 /* 942 * Callback to enable/disable Deep C-States based on power.conf setting. 943 */ 944 static boolean_t 945 hpet_deep_idle_config(int code) 946 { 947 ulong_t intr, dead_count = 0; 948 hrtime_t dead = gethrtime() + hpet_spin_timeout; 949 boolean_t ret = B_TRUE; 950 951 mutex_enter(&hpet_state_lock); 952 switch (code) { 953 case PM_DEFAULT_CPU_DEEP_IDLE: 954 /*FALLTHROUGH*/ 955 case PM_ENABLE_CPU_DEEP_IDLE: 956 957 if (hpet_state.cpu_deep_idle == B_TRUE) 958 break; 959 960 if (hpet_state.proxy_installed == B_FALSE) { 961 ret = B_FALSE; /* Deep C-States not supported */ 962 break; 963 } 964 965 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer); 966 hpet_state.cpu_deep_idle = B_TRUE; 967 break; 968 969 case PM_DISABLE_CPU_DEEP_IDLE: 970 971 if ((hpet_state.cpu_deep_idle == B_FALSE) || 972 (hpet_state.proxy_installed == B_FALSE)) 973 break; 974 975 /* 976 * The order of these operations is important to avoid 977 * lost wakeups: Set a flag to refuse all future LAPIC Timer 978 * proxy requests, then wake up all CPUs from deep C-state, 979 * and finally disable the HPET interrupt-generating timer. 980 */ 981 hpet_state.cpu_deep_idle = B_FALSE; 982 983 intr = intr_clear(); 984 while (!mutex_tryenter(&hpet_proxy_lock)) { 985 /* 986 * spin 987 */ 988 intr_restore(intr); 989 if (dead_count++ > hpet_spin_check) { 990 dead_count = 0; 991 if (gethrtime() > dead) { 992 hpet_state.cpu_deep_idle = B_TRUE; 993 mutex_exit(&hpet_state_lock); 994 cmn_err(CE_NOTE, 995 "!hpet_deep_idle_config: deadman"); 996 return (B_FALSE); 997 } 998 } 999 intr = intr_clear(); 1000 } 1001 hpet_expire_all(); 1002 mutex_exit(&hpet_proxy_lock); 1003 intr_restore(intr); 1004 1005 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer); 1006 break; 1007 1008 default: 1009 cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n", 1010 code); 1011 ret = B_FALSE; 1012 break; 1013 } 1014 mutex_exit(&hpet_state_lock); 1015 1016 return (ret); 1017 } 1018 1019 /* 1020 * Callback for _CST c-state change notifications. 1021 */ 1022 static void 1023 hpet_cst_callback(uint32_t code) 1024 { 1025 ulong_t intr, dead_count = 0; 1026 hrtime_t dead = gethrtime() + hpet_spin_timeout; 1027 1028 switch (code) { 1029 case CST_EVENT_ONE_CSTATE: 1030 hpet_state.uni_cstate = B_TRUE; 1031 intr = intr_clear(); 1032 while (!mutex_tryenter(&hpet_proxy_lock)) { 1033 /* 1034 * spin 1035 */ 1036 intr_restore(intr); 1037 if (dead_count++ > hpet_spin_check) { 1038 dead_count = 0; 1039 if (gethrtime() > dead) { 1040 hpet_expire_all(); 1041 cmn_err(CE_NOTE, 1042 "!hpet_cst_callback: deadman"); 1043 return; 1044 } 1045 } 1046 intr = intr_clear(); 1047 } 1048 hpet_expire_all(); 1049 mutex_exit(&hpet_proxy_lock); 1050 intr_restore(intr); 1051 break; 1052 1053 case CST_EVENT_MULTIPLE_CSTATES: 1054 hpet_state.uni_cstate = B_FALSE; 1055 break; 1056 1057 default: 1058 cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code); 1059 break; 1060 } 1061 } 1062 1063 /* 1064 * Interrupt Service Routine for HPET I/O-APIC-generated interrupts. 1065 * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops. 1066 * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as 1067 * needed. 1068 */ 1069 static uint_t 1070 hpet_isr(caddr_t arg __unused, caddr_t arg1 __unused) 1071 { 1072 uint64_t timer_status; 1073 uint64_t timer_mask; 1074 ulong_t intr, dead_count = 0; 1075 hrtime_t dead = gethrtime() + hpet_isr_spin_timeout; 1076 1077 timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer); 1078 1079 /* 1080 * We are using a level-triggered interrupt. 1081 * HPET sets timer's General Interrupt Status Register bit N. 1082 * ISR checks this bit to see if it needs servicing. 1083 * ISR then clears this bit by writing 1 to that bit. 1084 */ 1085 timer_status = hpet_read_gen_intrpt_stat(&hpet_info); 1086 if (!(timer_status & timer_mask)) 1087 return (DDI_INTR_UNCLAIMED); 1088 hpet_write_gen_intrpt_stat(&hpet_info, timer_mask); 1089 1090 /* 1091 * Do not touch ISR data structures before checking the HPET's General 1092 * Interrupt Status register. The General Interrupt Status register 1093 * will not be set by hardware until after timer interrupt generation 1094 * is enabled by software. Software allocates necessary data 1095 * structures before enabling timer interrupts. ASSERT the software 1096 * data structures required to handle this interrupt are initialized. 1097 */ 1098 ASSERT(hpet_proxy_users != NULL); 1099 1100 /* 1101 * CPUs in deep c-states do not enable interrupts until after 1102 * performing idle cleanup which includes descheduling themselves from 1103 * the HPET. The CPU running this ISR will NEVER find itself in the 1104 * proxy list. A lost wakeup may occur if this is false. 1105 */ 1106 ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY); 1107 1108 /* 1109 * Higher level interrupts may deadlock with CPUs going idle if this 1110 * ISR is prempted while holding hpet_proxy_lock. 1111 */ 1112 intr = intr_clear(); 1113 while (!mutex_tryenter(&hpet_proxy_lock)) { 1114 /* 1115 * spin 1116 */ 1117 intr_restore(intr); 1118 if (dead_count++ > hpet_spin_check) { 1119 dead_count = 0; 1120 if (gethrtime() > dead) { 1121 hpet_expire_all(); 1122 return (DDI_INTR_CLAIMED); 1123 } 1124 } 1125 intr = intr_clear(); 1126 } 1127 (void) hpet_guaranteed_schedule(HPET_INFINITY); 1128 mutex_exit(&hpet_proxy_lock); 1129 intr_restore(intr); 1130 1131 return (DDI_INTR_CLAIMED); 1132 } 1133 1134 /* 1135 * Used when disabling the HPET Timer interrupt. CPUs in Deep C-state must be 1136 * woken up because they can no longer rely on the HPET's Timer to wake them. 1137 * We do not need to wait for CPUs to wakeup. 1138 */ 1139 static void 1140 hpet_expire_all(void) 1141 { 1142 processorid_t id; 1143 1144 for (id = 0; id < max_ncpus; ++id) { 1145 if (hpet_proxy_users[id] != HPET_INFINITY) { 1146 hpet_proxy_users[id] = HPET_INFINITY; 1147 if (id != CPU->cpu_id) 1148 poke_cpu(id); 1149 } 1150 } 1151 } 1152 1153 /* 1154 * To avoid missed wakeups this function must guarantee either the HPET timer 1155 * was successfully programmed to the next expire time or there are no waiting 1156 * CPUs. 1157 * 1158 * Callers cannot enter C2 or deeper if the HPET could not be programmed to 1159 * generate its next interrupt to happen at required_wakeup_time or sooner. 1160 * Returns B_TRUE if the HPET was programmed to interrupt by 1161 * required_wakeup_time, B_FALSE if not. 1162 */ 1163 static boolean_t 1164 hpet_guaranteed_schedule(hrtime_t required_wakeup_time) 1165 { 1166 hrtime_t now, next_proxy_time; 1167 processorid_t id, next_proxy_id; 1168 int proxy_timer = hpet_info.cstate_timer.timer; 1169 boolean_t done = B_FALSE; 1170 1171 ASSERT(mutex_owned(&hpet_proxy_lock)); 1172 1173 /* 1174 * Loop until we successfully program the HPET, 1175 * or no CPUs are scheduled to use the HPET as a proxy. 1176 */ 1177 do { 1178 /* 1179 * Wake all CPUs that expired before now. 1180 * Find the next CPU to wake up and next HPET program time. 1181 */ 1182 now = gethrtime(); 1183 next_proxy_time = HPET_INFINITY; 1184 next_proxy_id = CPU->cpu_id; 1185 for (id = 0; id < max_ncpus; ++id) { 1186 if (hpet_proxy_users[id] < now) { 1187 hpet_proxy_users[id] = HPET_INFINITY; 1188 if (id != CPU->cpu_id) 1189 poke_cpu(id); 1190 } else if (hpet_proxy_users[id] < next_proxy_time) { 1191 next_proxy_time = hpet_proxy_users[id]; 1192 next_proxy_id = id; 1193 } 1194 } 1195 1196 if (next_proxy_time == HPET_INFINITY) { 1197 done = B_TRUE; 1198 /* 1199 * There are currently no CPUs using the HPET's Timer 1200 * as a proxy for their LAPIC Timer. The HPET's Timer 1201 * does not need to be programmed. 1202 * 1203 * Letting the HPET timer wrap around to the current 1204 * time is the longest possible timeout. 1205 * A 64-bit timer will wrap around in ~ 2^44 seconds. 1206 * A 32-bit timer will wrap around in ~ 2^12 seconds. 1207 * 1208 * Disabling the HPET's timer interrupt requires a 1209 * (relatively expensive) write to the HPET. 1210 * Instead we do nothing. 1211 * 1212 * We are gambling some CPU will attempt to enter a 1213 * deep c-state before the timer wraps around. 1214 * We assume one spurious interrupt in a little over an 1215 * hour has less performance impact than writing to the 1216 * HPET's timer disable bit every time all CPUs wakeup 1217 * from deep c-state. 1218 */ 1219 1220 } else { 1221 /* 1222 * Idle CPUs disable interrupts before programming the 1223 * HPET to prevent a lost wakeup if the HPET 1224 * interrupts the idle cpu before it can enter a 1225 * Deep C-State. 1226 */ 1227 if (hpet_timer_program(&hpet_info, proxy_timer, 1228 HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime())) 1229 != AE_OK) { 1230 /* 1231 * We could not program the HPET to wakeup the 1232 * next CPU. We must wake the CPU ourself to 1233 * avoid a lost wakeup. 1234 */ 1235 hpet_proxy_users[next_proxy_id] = HPET_INFINITY; 1236 if (next_proxy_id != CPU->cpu_id) 1237 poke_cpu(next_proxy_id); 1238 } else { 1239 done = B_TRUE; 1240 } 1241 } 1242 1243 } while (!done); 1244 1245 return (next_proxy_time <= required_wakeup_time); 1246 } 1247 1248 /* 1249 * Use an HPET timer to act as this CPU's proxy local APIC timer. 1250 * Used in deep c-states C2 and above while the CPU's local APIC timer stalls. 1251 * Called by the idle thread with interrupts enabled. 1252 * Always returns with interrupts disabled. 1253 * 1254 * There are 3 possible outcomes from this function: 1255 * 1. The Local APIC Timer was already disabled before this function was called. 1256 * LAPIC TIMER : disabled 1257 * HPET : not scheduled to wake this CPU 1258 * *lapic_expire : (hrtime_t)HPET_INFINITY 1259 * Returns : B_TRUE 1260 * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy. 1261 * LAPIC TIMER : disabled 1262 * HPET : scheduled to wake this CPU 1263 * *lapic_expire : hrtime_t when LAPIC timer would have expired 1264 * Returns : B_TRUE 1265 * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy. 1266 * LAPIC TIMER : enabled 1267 * HPET : not scheduled to wake this CPU 1268 * *lapic_expire : (hrtime_t)HPET_INFINITY 1269 * Returns : B_FALSE 1270 * 1271 * The idle thread cannot enter Deep C-State in case 3. 1272 * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2. 1273 */ 1274 static boolean_t 1275 hpet_use_hpet_timer(hrtime_t *lapic_expire) 1276 { 1277 extern hrtime_t apic_timer_stop_count(void); 1278 extern void apic_timer_restart(hrtime_t); 1279 hrtime_t now, expire, dead; 1280 uint64_t lapic_count, dead_count; 1281 cpupart_t *cpu_part; 1282 processorid_t cpu_sid; 1283 processorid_t cpu_id = CPU->cpu_id; 1284 processorid_t id; 1285 boolean_t rslt; 1286 boolean_t hset_update; 1287 1288 cpu_part = CPU->cpu_part; 1289 cpu_sid = CPU->cpu_seqid; 1290 1291 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread); 1292 1293 /* 1294 * A critical section exists between when the HPET is programmed 1295 * to interrupt the CPU and when this CPU enters an idle state. 1296 * Interrupts must be blocked during that time to prevent lost 1297 * CBE wakeup interrupts from either LAPIC or HPET. 1298 * 1299 * Must block interrupts before acquiring hpet_proxy_lock to prevent 1300 * a deadlock with the ISR if the ISR runs on this CPU after the 1301 * idle thread acquires the mutex but before it clears interrupts. 1302 */ 1303 ASSERT(!interrupts_enabled()); 1304 lapic_count = apic_timer_stop_count(); 1305 now = gethrtime(); 1306 dead = now + hpet_idle_spin_timeout; 1307 *lapic_expire = expire = now + lapic_count; 1308 if (lapic_count == (hrtime_t)-1) { 1309 /* 1310 * LAPIC timer is currently disabled. 1311 * Will not use the HPET as a LAPIC Timer proxy. 1312 */ 1313 *lapic_expire = (hrtime_t)HPET_INFINITY; 1314 return (B_TRUE); 1315 } 1316 1317 /* 1318 * Serialize hpet_proxy data structure manipulation. 1319 */ 1320 dead_count = 0; 1321 while (!mutex_tryenter(&hpet_proxy_lock)) { 1322 /* 1323 * spin 1324 */ 1325 apic_timer_restart(expire); 1326 sti(); 1327 cli(); 1328 1329 if (dead_count++ > hpet_spin_check) { 1330 dead_count = 0; 1331 hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) && 1332 (ncpus > 1)); 1333 if (hset_update && 1334 !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) { 1335 *lapic_expire = (hrtime_t)HPET_INFINITY; 1336 return (B_FALSE); 1337 } 1338 } 1339 1340 lapic_count = apic_timer_stop_count(); 1341 now = gethrtime(); 1342 *lapic_expire = expire = now + lapic_count; 1343 if (lapic_count == (hrtime_t)-1) { 1344 /* 1345 * LAPIC timer is currently disabled. 1346 * Will not use the HPET as a LAPIC Timer proxy. 1347 */ 1348 *lapic_expire = (hrtime_t)HPET_INFINITY; 1349 return (B_TRUE); 1350 } 1351 if (now > dead) { 1352 apic_timer_restart(expire); 1353 *lapic_expire = (hrtime_t)HPET_INFINITY; 1354 return (B_FALSE); 1355 } 1356 } 1357 1358 if ((hpet_state.cpr == B_TRUE) || 1359 (hpet_state.cpu_deep_idle == B_FALSE) || 1360 (hpet_state.proxy_installed == B_FALSE) || 1361 (hpet_state.uni_cstate == B_TRUE)) { 1362 mutex_exit(&hpet_proxy_lock); 1363 apic_timer_restart(expire); 1364 *lapic_expire = (hrtime_t)HPET_INFINITY; 1365 return (B_FALSE); 1366 } 1367 1368 hpet_proxy_users[cpu_id] = expire; 1369 1370 /* 1371 * We are done if another cpu is scheduled on the HPET with an 1372 * expire time before us. The next HPET interrupt has been programmed 1373 * to fire before our expire time. 1374 */ 1375 for (id = 0; id < max_ncpus; ++id) { 1376 if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) { 1377 mutex_exit(&hpet_proxy_lock); 1378 return (B_TRUE); 1379 } 1380 } 1381 1382 /* 1383 * We are the next lAPIC to expire. 1384 * Program the HPET with our expire time. 1385 */ 1386 rslt = hpet_guaranteed_schedule(expire); 1387 mutex_exit(&hpet_proxy_lock); 1388 1389 if (rslt == B_FALSE) { 1390 apic_timer_restart(expire); 1391 *lapic_expire = (hrtime_t)HPET_INFINITY; 1392 } 1393 1394 return (rslt); 1395 } 1396 1397 /* 1398 * Called by the idle thread when waking up from Deep C-state before enabling 1399 * interrupts. With an array data structure it is faster to always remove 1400 * ourself from the array without checking if the HPET ISR already removed. 1401 * 1402 * We use a lazy algorithm for removing CPUs from the HPET's schedule. 1403 * We do not reprogram the HPET here because this CPU has real work to do. 1404 * On a idle system the CPU was probably woken up by the HPET's ISR. 1405 * On a heavily loaded system CPUs are not going into Deep C-state. 1406 * On a moderately loaded system another CPU will usually enter Deep C-state 1407 * and reprogram the HPET before the HPET fires with our wakeup. 1408 */ 1409 static void 1410 hpet_use_lapic_timer(hrtime_t expire) 1411 { 1412 extern void apic_timer_restart(hrtime_t); 1413 processorid_t cpu_id = CPU->cpu_id; 1414 1415 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread); 1416 ASSERT(!interrupts_enabled()); 1417 1418 hpet_proxy_users[cpu_id] = HPET_INFINITY; 1419 1420 /* 1421 * Do not enable a LAPIC Timer that was initially disabled. 1422 */ 1423 if (expire != HPET_INFINITY) 1424 apic_timer_restart(expire); 1425 } 1426 1427 /* 1428 * Initialize data structure to keep track of CPUs using HPET as a proxy for 1429 * their stalled local APIC timer. For now this is just an array. 1430 */ 1431 static void 1432 hpet_init_proxy_data(void) 1433 { 1434 processorid_t id; 1435 1436 /* 1437 * Use max_ncpus for hot plug compliance. 1438 */ 1439 hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users), 1440 KM_SLEEP); 1441 1442 /* 1443 * Unused entries always contain HPET_INFINITY. 1444 */ 1445 for (id = 0; id < max_ncpus; ++id) 1446 hpet_proxy_users[id] = HPET_INFINITY; 1447 } 1448