1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2020 Oxide Computer Company
24 * Copyright 2020 Joyent, Inc.
25 */
26
27 #include <sys/hpet_acpi.h>
28 #include <sys/hpet.h>
29 #include <sys/bitmap.h>
30 #include <sys/inttypes.h>
31 #include <sys/time.h>
32 #include <sys/sunddi.h>
33 #include <sys/ksynch.h>
34 #include <sys/apic.h>
35 #include <sys/callb.h>
36 #include <sys/clock.h>
37 #include <sys/archsystm.h>
38 #include <sys/cpupart.h>
39 #include <sys/x86_archext.h>
40 #include <sys/prom_debug.h>
41 #include <sys/psm.h>
42 #include <sys/bootconf.h>
43
44 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
45 static boolean_t hpet_install_proxy(void);
46 static boolean_t hpet_callback(int code);
47 static boolean_t hpet_cpr(int code);
48 static boolean_t hpet_resume(void);
49 static void hpet_cst_callback(uint32_t code);
50 static boolean_t hpet_deep_idle_config(int code);
51 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
52 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
53 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
54 static int hpet_start_main_counter(hpet_info_t *hip);
55 static int hpet_stop_main_counter(hpet_info_t *hip);
56 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
57 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
58 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
59 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
60 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
61 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
62 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
63 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
64 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
65 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
66 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
67 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
68 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
69 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
70 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
71 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
72 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
73 uint32_t interrupt);
74 static uint_t hpet_isr(caddr_t, caddr_t);
75 static uint32_t hpet_install_interrupt_handler(avfunc func, int vector);
76 static void hpet_uninstall_interrupt_handler(void);
77 static void hpet_expire_all(void);
78 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
79 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
80 static void hpet_use_lapic_timer(hrtime_t expire);
81 static void hpet_init_proxy_data(void);
82
83 /*
84 * hpet_state_lock is used to synchronize disabling/enabling deep c-states
85 * and to synchronize suspend/resume.
86 */
87 static kmutex_t hpet_state_lock;
88 static struct hpet_state {
89 boolean_t proxy_installed; /* CBE proxy interrupt setup */
90 boolean_t cpr; /* currently in CPR */
91 boolean_t cpu_deep_idle; /* user enable/disable */
92 boolean_t uni_cstate; /* disable if only one cstate */
93 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
94
95 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
96 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
97 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
98 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
99
100 static kmutex_t hpet_proxy_lock; /* lock for lAPIC proxy data */
101 /*
102 * hpet_proxy_users is a per-cpu array.
103 */
104 static hpet_proxy_t *hpet_proxy_users; /* one per CPU */
105
106 static boolean_t hpet_early_init_failed;
107
108 ACPI_TABLE_HPET *hpet_table; /* ACPI HPET table */
109 hpet_info_t hpet_info; /* Human readable Information */
110
111 static hrtime_t (*apic_timer_stop_count_fn)(void);
112 static void (*apic_timer_restart_fn)(hrtime_t);
113
114 /*
115 * Provide HPET access from unix.so.
116 * Set up pointers to access symbols in pcplusmp.
117 */
118 static void
hpet_establish_hooks(void)119 hpet_establish_hooks(void)
120 {
121 hpet.install_proxy = &hpet_install_proxy;
122 hpet.callback = &hpet_callback;
123 hpet.use_hpet_timer = &hpet_use_hpet_timer;
124 hpet.use_lapic_timer = &hpet_use_lapic_timer;
125 }
126
127 /*
128 * Initialize the HPET early in the boot process if it is both present
129 * and needed to calibrate the TSC. This initializes the HPET enough to
130 * allow the main counter to be read for calibration purposes.
131 *
132 * If the HPET is not needed early in the boot process, but is needed later
133 * by ACPI, this will be called at that time to start the initialization
134 * process.
135 */
136 int
hpet_early_init(void)137 hpet_early_init(void)
138 {
139 extern hrtime_t tsc_read(void);
140 void *la;
141 uint64_t ret;
142 uint_t num_timers;
143 uint_t ti;
144
145 PRM_POINT("Initializing the HPET...");
146
147 /* If we tried and failed, don't try again. */
148 if (hpet_early_init_failed) {
149 PRM_POINT("Prior HPET initialization failed, aborting...");
150 return (DDI_FAILURE);
151 }
152
153 /* No need to initialize again if we already succeeded */
154 if (hpet.supported >= HPET_TIMER_SUPPORT)
155 return (DDI_SUCCESS);
156
157 (void) memset(&hpet_info, 0, sizeof (hpet_info));
158 hpet.supported = HPET_NO_SUPPORT;
159
160 /*
161 * Once called, we assume initialization fails unless we complete all
162 * the early init tasks.
163 */
164 hpet_early_init_failed = B_TRUE;
165
166 if ((get_hwenv() & HW_XEN_HVM) != 0) {
167 /*
168 * In some AWS EC2 guests, though the HPET is advertised via
169 * ACPI, programming the interrupt on the non-legacy timer can
170 * result in an immediate reset of the instance. It is not
171 * currently possible to tell whether this is an instance with
172 * broken HPET emulation or not, so we simply disable it across
173 * the board.
174 */
175 PRM_POINT("will not program HPET in Xen HVM");
176 return (DDI_FAILURE);
177 }
178
179 /*
180 * If there are any HPET tables, we should have mapped and stored
181 * the address of the first table while building up the boot
182 * properties.
183 *
184 * Systems with a large numbers of HPET timer blocks may have
185 * multiple HPET tables (each HPET table can contain at most 32 timer
186 * blocks). Most x86 systems have 1 HPET table with 3 counters (it
187 * appears multiple HPET timers was largely seen on Itanium systems).
188 * illumos currently only uses the first HPET table, so we do not need
189 * to be concerned about additional tables.
190 */
191 if (BOP_GETPROPLEN(bootops, "hpet-table") != 8 ||
192 BOP_GETPROP(bootops, "hpet-table", (void *)&hpet_table) != 0) {
193 cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
194 return (DDI_FAILURE);
195 }
196
197 if (hpet_validate_table(hpet_table) != AE_OK) {
198 cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
199 return (DDI_FAILURE);
200 }
201
202 PRM_POINT("hpet_memory_map()");
203 la = hpet_memory_map(hpet_table);
204 PRM_DEBUG(la);
205 if (la == NULL) {
206 cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
207 return (DDI_FAILURE);
208 }
209 hpet_info.logical_address = la;
210
211 PRM_POINT("hpet_read_gen_cap()");
212 ret = hpet_read_gen_cap(&hpet_info);
213 PRM_DEBUG(ret);
214 hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
215 hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
216 hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
217 hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
218 /*
219 * Hardware contains the last timer's number.
220 * Add 1 to get the number of timers.
221 */
222 hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
223 hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
224
225 if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
226 cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
227 (long)hpet_info.gen_cap.counter_clk_period,
228 (long)HPET_MAX_CLK_PERIOD);
229 return (DDI_FAILURE);
230 }
231
232 num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
233 PRM_DEBUG(num_timers);
234 if ((num_timers < 3) || (num_timers > 32)) {
235 cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
236 "%lx", (long)num_timers);
237 return (DDI_FAILURE);
238 }
239 hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
240 num_timers * sizeof (uint64_t), KM_SLEEP);
241
242 PRM_POINT("hpet_read_gen_config()");
243 ret = hpet_read_gen_config(&hpet_info);
244 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
245 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
246
247 /*
248 * illumos does not use the HPET Legacy Replacement Route capabilities.
249 * This feature has been off by default on test systems.
250 * The HPET spec does not specify if Legacy Replacement Route is
251 * on or off by default, so we explicitly set it off here.
252 * It should not matter which mode the HPET is in since we use
253 * the first available non-legacy replacement timer: timer 2.
254 */
255 PRM_POINT("hpet_read_gen_config()");
256 (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
257
258 PRM_POINT("hpet_read_gen_config() again");
259 ret = hpet_read_gen_config(&hpet_info);
260 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
261 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
262
263 hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
264 hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
265
266 PRM_POINT("disable timer loop...");
267 for (ti = 0; ti < num_timers; ++ti) {
268 ret = hpet_read_timer_N_config(&hpet_info, ti);
269 /*
270 * Make sure no timers are enabled (think fast reboot or
271 * virtual hardware).
272 */
273 if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
274 hpet_disable_timer(&hpet_info, ti);
275 ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
276 }
277
278 hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
279 }
280 PRM_POINT("disable timer loop complete");
281
282 /*
283 * Be aware the Main Counter may need to be initialized in the future
284 * if it is used for more than just Deep C-State support.
285 * The HPET's Main Counter does not need to be initialize to a specific
286 * value before starting it for use to wake up CPUs from Deep C-States.
287 */
288 PRM_POINT("hpet_start_main_counter()");
289 if (hpet_start_main_counter(&hpet_info) != AE_OK) {
290 cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
291 return (DDI_FAILURE);
292 }
293
294 hpet_info.period = hpet_info.gen_cap.counter_clk_period;
295 /*
296 * Read main counter twice to record HPET latency for debugging.
297 */
298 PRM_POINT("TSC and HPET reads:");
299 hpet_info.tsc[0] = tsc_read();
300 hpet_info.hpet_main_counter_reads[0] =
301 hpet_read_main_counter_value(&hpet_info);
302 hpet_info.tsc[1] = tsc_read();
303 hpet_info.hpet_main_counter_reads[1] =
304 hpet_read_main_counter_value(&hpet_info);
305 hpet_info.tsc[2] = tsc_read();
306
307 PRM_DEBUG(hpet_info.hpet_main_counter_reads[0]);
308 PRM_DEBUG(hpet_info.hpet_main_counter_reads[1]);
309 PRM_DEBUG(hpet_info.tsc[0]);
310 PRM_DEBUG(hpet_info.tsc[1]);
311 PRM_DEBUG(hpet_info.tsc[2]);
312
313 ret = hpet_read_gen_config(&hpet_info);
314 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
315 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
316
317 /*
318 * HPET main counter reads are supported now.
319 */
320 hpet.supported = HPET_TIMER_SUPPORT;
321 hpet_early_init_failed = B_FALSE;
322
323 PRM_POINT("HPET main counter configured for reading...");
324 return (DDI_SUCCESS);
325 }
326
327 /*
328 * Called by acpi_init() to set up HPET interrupts and fully initialize the
329 * HPET.
330 */
331 int
hpet_acpi_init(int * hpet_vect,iflag_t * hpet_flags,hrtime_t (* stop_fn)(void),void (* restart_fn)(hrtime_t))332 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags, hrtime_t (*stop_fn)(void),
333 void (*restart_fn)(hrtime_t))
334 {
335 extern int idle_cpu_no_deep_c;
336 extern int cpuid_deep_cstates_supported(void);
337
338 PRM_POINT("Completing HPET initialization...");
339
340 if (hpet_early_init() != DDI_SUCCESS) {
341 PRM_POINT("Early HPET initialization failed; aborting...");
342 return (DDI_FAILURE);
343 }
344
345 /*
346 * These functions reside in either pcplusmp or apix, and allow
347 * the HPET to proxy the LAPIC.
348 */
349 apic_timer_stop_count_fn = stop_fn;
350 apic_timer_restart_fn = restart_fn;
351
352 hpet_establish_hooks();
353
354 if (idle_cpu_no_deep_c ||
355 !cpuid_deep_cstates_supported()) {
356 /*
357 * If Deep C-States are disabled or not supported, then we do
358 * not need to program the HPET at all as it will not
359 * subsequently be used.
360 */
361 PRM_POINT("no need to program the HPET");
362 return (DDI_FAILURE);
363 }
364
365 return (hpet_init_proxy(hpet_vect, hpet_flags));
366 }
367
368 void
hpet_acpi_fini(void)369 hpet_acpi_fini(void)
370 {
371 if (hpet.supported == HPET_NO_SUPPORT)
372 return;
373 if (hpet.supported >= HPET_TIMER_SUPPORT)
374 (void) hpet_stop_main_counter(&hpet_info);
375 if (hpet.supported > HPET_TIMER_SUPPORT)
376 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
377 }
378
379 /*
380 * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
381 * LAPIC Timers. Get a free HPET timer that supports I/O APIC routed interrupt.
382 * Setup data to handle the timer's ISR, and add the timer's interrupt.
383 *
384 * The ddi cannot be use to allocate the HPET timer's interrupt.
385 * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
386 * to handle the HPET timer's interrupt.
387 *
388 * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
389 * ICH9. The HPET spec allows for MSI. In the future MSI may be prefered.
390 */
391 static int
hpet_init_proxy(int * hpet_vect,iflag_t * hpet_flags)392 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
393 {
394 PRM_POINT("hpet_get_IOAPIC_intr_capable_timer()");
395 if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
396 cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
397 return (DDI_FAILURE);
398 }
399
400 hpet_init_proxy_data();
401
402 PRM_POINT("hpet_install_interrupt_handler()");
403 if (hpet_install_interrupt_handler(&hpet_isr,
404 hpet_info.cstate_timer.intr) != AE_OK) {
405 cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
406 return (DDI_FAILURE);
407 }
408 *hpet_vect = hpet_info.cstate_timer.intr;
409 hpet_flags->intr_el = INTR_EL_LEVEL;
410 hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
411 hpet_flags->bustype = BUS_PCI; /* we *do* conform to PCI */
412
413 /*
414 * Avoid a possibly stuck interrupt by programing the HPET's timer here
415 * before the I/O APIC is programmed to handle this interrupt.
416 */
417 PRM_POINT("hpet_timer_set_up()");
418 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
419 hpet_info.cstate_timer.intr);
420 PRM_POINT("back from hpet_timer_set_up()");
421
422 /*
423 * All HPET functionality is supported.
424 */
425 hpet.supported = HPET_FULL_SUPPORT;
426 PRM_POINT("HPET full support");
427 return (DDI_SUCCESS);
428 }
429
430 /*
431 * Called by kernel if it can support Deep C-States.
432 */
433 static boolean_t
hpet_install_proxy(void)434 hpet_install_proxy(void)
435 {
436 if (hpet_state.proxy_installed == B_TRUE)
437 return (B_TRUE);
438
439 if (hpet.supported != HPET_FULL_SUPPORT)
440 return (B_FALSE);
441
442 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
443 hpet_state.proxy_installed = B_TRUE;
444
445 return (B_TRUE);
446 }
447
448 /*
449 * Remove the interrupt that was added with add_avintr() in
450 * hpet_install_interrupt_handler().
451 */
452 static void
hpet_uninstall_interrupt_handler(void)453 hpet_uninstall_interrupt_handler(void)
454 {
455 rem_avintr(NULL, CBE_HIGH_PIL, &hpet_isr, hpet_info.cstate_timer.intr);
456 }
457
458 static int
hpet_validate_table(ACPI_TABLE_HPET * hpet_table)459 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
460 {
461 ACPI_TABLE_HEADER *table_header = (ACPI_TABLE_HEADER *)hpet_table;
462
463 if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
464 cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
465 "ACPI_TABLE_HPET) %lx.",
466 (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
467 (unsigned long)sizeof (ACPI_TABLE_HPET));
468 return (AE_ERROR);
469 }
470
471 if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
472 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
473 "signature");
474 return (AE_ERROR);
475 }
476
477 if (!hpet_checksum_table((unsigned char *)hpet_table,
478 (unsigned int)table_header->Length)) {
479 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
480 return (AE_ERROR);
481 }
482
483 /*
484 * Sequence should be table number - 1. We are using table 1.
485 */
486 if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
487 cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
488 (long)hpet_table->Sequence);
489 return (AE_ERROR);
490 }
491
492 return (AE_OK);
493 }
494
495 static boolean_t
hpet_checksum_table(unsigned char * table,unsigned int length)496 hpet_checksum_table(unsigned char *table, unsigned int length)
497 {
498 unsigned char checksum = 0;
499 int i;
500
501 for (i = 0; i < length; ++i, ++table)
502 checksum += *table;
503
504 return (checksum == 0);
505 }
506
507 static void *
hpet_memory_map(ACPI_TABLE_HPET * hpet_table)508 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
509 {
510 return (psm_map_new(hpet_table->Address.Address, (size_t)HPET_SIZE,
511 PSM_PROT_WRITE | PSM_PROT_READ));
512 }
513
514 static int
hpet_start_main_counter(hpet_info_t * hip)515 hpet_start_main_counter(hpet_info_t *hip)
516 {
517 uint64_t *gcr_ptr;
518 uint64_t gcr;
519
520 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
521 gcr = *gcr_ptr;
522
523 gcr |= HPET_GCFR_ENABLE_CNF;
524 *gcr_ptr = gcr;
525 gcr = *gcr_ptr;
526
527 return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
528 }
529
530 static int
hpet_stop_main_counter(hpet_info_t * hip)531 hpet_stop_main_counter(hpet_info_t *hip)
532 {
533 uint64_t *gcr_ptr;
534 uint64_t gcr;
535
536 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
537 gcr = *gcr_ptr;
538
539 gcr &= ~HPET_GCFR_ENABLE_CNF;
540 *gcr_ptr = gcr;
541 gcr = *gcr_ptr;
542
543 return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
544 }
545
546 boolean_t
hpet_timer_is_readable(void)547 hpet_timer_is_readable(void)
548 {
549 return ((hpet.supported >= HPET_TIMER_SUPPORT) ? B_TRUE : B_FALSE);
550 }
551
552 uint64_t
hpet_read_timer(void)553 hpet_read_timer(void)
554 {
555 return (hpet_read_main_counter_value(&hpet_info));
556 }
557
558 /*
559 * Set the Legacy Replacement Route bit.
560 * This should be called before setting up timers.
561 * The HPET specification is silent regarding setting this after timers are
562 * programmed.
563 */
564 static uint64_t
hpet_set_leg_rt_cnf(hpet_info_t * hip,uint32_t new_value)565 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
566 {
567 uint64_t gen_conf = hpet_read_gen_config(hip);
568
569 switch (new_value) {
570 case 0:
571 gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
572 break;
573
574 case HPET_GCFR_LEG_RT_CNF:
575 gen_conf |= HPET_GCFR_LEG_RT_CNF;
576 break;
577
578 default:
579 ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
580 break;
581 }
582 hpet_write_gen_config(hip, gen_conf);
583 return (gen_conf);
584 }
585
586 static uint64_t
hpet_read_gen_cap(hpet_info_t * hip)587 hpet_read_gen_cap(hpet_info_t *hip)
588 {
589 return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
590 }
591
592 static uint64_t
hpet_read_gen_config(hpet_info_t * hip)593 hpet_read_gen_config(hpet_info_t *hip)
594 {
595 return (*(uint64_t *)
596 HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
597 }
598
599 static uint64_t
hpet_read_gen_intrpt_stat(hpet_info_t * hip)600 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
601 {
602 hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
603 hip->logical_address);
604 return (hip->gen_intrpt_stat);
605 }
606
607 static uint64_t
hpet_read_timer_N_config(hpet_info_t * hip,uint_t n)608 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
609 {
610 uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
611 hip->logical_address, n);
612 hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
613 return (conf);
614 }
615
616 static hpet_TN_conf_cap_t
hpet_convert_timer_N_config(uint64_t conf)617 hpet_convert_timer_N_config(uint64_t conf)
618 {
619 hpet_TN_conf_cap_t cc = { 0 };
620
621 cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
622 cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
623 cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
624 cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
625 cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
626 cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
627 cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
628 cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
629 cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
630 cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
631 cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
632
633 return (cc);
634 }
635
636 static uint64_t
hpet_read_main_counter_value(hpet_info_t * hip)637 hpet_read_main_counter_value(hpet_info_t *hip)
638 {
639 uint64_t value;
640 uint32_t *counter;
641 uint32_t high1, high2, low;
642
643 counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
644
645 /*
646 * 32-bit main counters
647 */
648 if (hip->gen_cap.count_size_cap == 0) {
649 value = (uint64_t)*counter;
650 hip->main_counter_value = value;
651 return (value);
652 }
653
654 /*
655 * HPET spec claims a 64-bit read can be split into two 32-bit reads
656 * by the hardware connection to the HPET.
657 */
658 high2 = counter[1];
659 do {
660 high1 = high2;
661 low = counter[0];
662 high2 = counter[1];
663 } while (high2 != high1);
664
665 value = ((uint64_t)high1 << 32) | low;
666 hip->main_counter_value = value;
667 return (value);
668 }
669
670 static void
hpet_write_gen_config(hpet_info_t * hip,uint64_t l)671 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
672 {
673 *(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
674 }
675
676 static void
hpet_write_gen_intrpt_stat(hpet_info_t * hip,uint64_t l)677 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
678 {
679 *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
680 }
681
682 static void
hpet_write_timer_N_config(hpet_info_t * hip,uint_t n,uint64_t conf)683 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t conf)
684 {
685 /*
686 * The configuration register size is not affected by the size
687 * capability; it is always a 64-bit value. The top 32-bit half of
688 * this register is always read-only so we constrain our write to the
689 * bottom half.
690 */
691 uint32_t *confaddr = (uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
692 hip->logical_address, n);
693 uint32_t conf32 = 0xFFFFFFFF & conf;
694
695 PRM_DEBUG(n);
696 PRM_DEBUG(conf);
697 PRM_DEBUG(conf32);
698
699 *confaddr = conf32;
700
701 PRM_POINT("write done");
702 }
703
704 static void
hpet_write_timer_N_comp(hpet_info_t * hip,uint_t n,uint64_t l)705 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
706 {
707 *(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
708 }
709
710 static void
hpet_disable_timer(hpet_info_t * hip,uint32_t timer_n)711 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
712 {
713 uint64_t l;
714
715 l = hpet_read_timer_N_config(hip, timer_n);
716 l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
717 hpet_write_timer_N_config(hip, timer_n, l);
718 }
719
720 static void
hpet_enable_timer(hpet_info_t * hip,uint32_t timer_n)721 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
722 {
723 uint64_t l;
724
725 l = hpet_read_timer_N_config(hip, timer_n);
726 l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
727 hpet_write_timer_N_config(hip, timer_n, l);
728 }
729
730 /*
731 * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
732 *
733 * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
734 * from apic_picinit() psm_ops apic_ops entry point after we return from
735 * apic_init() psm_ops entry point.
736 */
737 static uint32_t
hpet_install_interrupt_handler(avfunc func,int vector)738 hpet_install_interrupt_handler(avfunc func, int vector)
739 {
740 uint32_t retval;
741
742 retval = add_avintr(NULL, CBE_HIGH_PIL, func, "HPET Timer",
743 vector, NULL, NULL, NULL, NULL);
744 if (retval == 0) {
745 cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
746 return (AE_BAD_PARAMETER);
747 }
748 return (AE_OK);
749 }
750
751 /*
752 * The HPET timers specify which I/O APIC interrupts they can be routed to.
753 * Find the first available non-legacy-replacement timer and its I/O APIC irq.
754 * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
755 * timer's timer_n_config register.
756 */
757 static int
hpet_get_IOAPIC_intr_capable_timer(hpet_info_t * hip)758 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
759 {
760 int timer;
761 int intr;
762
763 for (timer = HPET_FIRST_NON_LEGACY_TIMER;
764 timer < hip->gen_cap.num_tim_cap; ++timer) {
765 if (!hpet_timer_available(hip->allocated_timers, timer))
766 continue;
767
768 intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
769
770 PRM_DEBUG(timer);
771 PRM_DEBUG(intr);
772
773 if (intr >= 0) {
774 hpet_timer_alloc(&hip->allocated_timers, timer);
775 hip->cstate_timer.timer = timer;
776 hip->cstate_timer.intr = intr;
777 return (timer);
778 }
779 }
780
781 return (-1);
782 }
783
784 /*
785 * Mark this timer as used.
786 */
787 static void
hpet_timer_alloc(uint32_t * allocated_timers,uint32_t n)788 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
789 {
790 *allocated_timers |= 1 << n;
791 }
792
793 /*
794 * Check if this timer is available.
795 * No mutual exclusion because only one thread uses this.
796 */
797 static int
hpet_timer_available(uint32_t allocated_timers,uint32_t n)798 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
799 {
800 return ((allocated_timers & (1 << n)) == 0);
801 }
802
803 /*
804 * Setup timer N to route its interrupt to I/O APIC.
805 */
806 static void
hpet_timer_set_up(hpet_info_t * hip,uint32_t timer_n,uint32_t interrupt)807 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
808 {
809 uint64_t conf;
810
811 PRM_DEBUG(timer_n);
812 PRM_DEBUG(interrupt);
813
814 PRM_POINT("hpet_read_timer_N_config()");
815 conf = hpet_read_timer_N_config(hip, timer_n);
816 PRM_DEBUG(conf);
817
818 /*
819 * Caller is required to verify this interrupt route is supported.
820 */
821 ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
822
823 conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT; /* use IOAPIC */
824 conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
825 conf &= ~HPET_TIMER_N_TYPE_CNF_BIT; /* non periodic */
826 conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */
827 conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */
828
829 PRM_POINT("hpet_write_timer_N_config()");
830 PRM_DEBUG(conf);
831 hpet_write_timer_N_config(hip, timer_n, conf);
832 PRM_POINT("back from hpet_write_timer_N_config()");
833 }
834
835 /*
836 * The HPET's Main Counter is not stopped before programming an HPET timer.
837 * This will allow the HPET to be used as a time source.
838 * The programmed timer interrupt may occur before this function returns.
839 * Callers must block interrupts before calling this function if they must
840 * guarantee the interrupt is handled after this function returns.
841 *
842 * Return 0 if main counter is less than timer after enabling timer.
843 * The interrupt was programmed, but it may fire before this returns.
844 * Return !0 if main counter is greater than timer after enabling timer.
845 * In other words: the timer will not fire, and we do not know if it did fire.
846 *
847 * delta is in HPET ticks.
848 *
849 * Writing a 64-bit value to a 32-bit register will "wrap around".
850 * A 32-bit HPET timer will wrap around in a little over 5 minutes.
851 */
852 int
hpet_timer_program(hpet_info_t * hip,uint32_t timer,uint64_t delta)853 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
854 {
855 uint64_t time, program;
856
857 program = hpet_read_main_counter_value(hip);
858 program += delta;
859 hpet_write_timer_N_comp(hip, timer, program);
860
861 time = hpet_read_main_counter_value(hip);
862 if (time < program)
863 return (AE_OK);
864
865 return (AE_TIME);
866 }
867
868 /*
869 * CPR and power policy-change callback entry point.
870 */
871 boolean_t
hpet_callback(int code)872 hpet_callback(int code)
873 {
874 switch (code) {
875 case PM_DEFAULT_CPU_DEEP_IDLE:
876 /*FALLTHROUGH*/
877 case PM_ENABLE_CPU_DEEP_IDLE:
878 /*FALLTHROUGH*/
879 case PM_DISABLE_CPU_DEEP_IDLE:
880 return (hpet_deep_idle_config(code));
881
882 case CB_CODE_CPR_RESUME:
883 /*FALLTHROUGH*/
884 case CB_CODE_CPR_CHKPT:
885 return (hpet_cpr(code));
886
887 case CST_EVENT_MULTIPLE_CSTATES:
888 hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
889 return (B_TRUE);
890
891 case CST_EVENT_ONE_CSTATE:
892 hpet_cst_callback(CST_EVENT_ONE_CSTATE);
893 return (B_TRUE);
894
895 default:
896 cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
897 return (B_FALSE);
898 }
899 }
900
901 /*
902 * According to the HPET spec 1.0a: the Operating System must save and restore
903 * HPET event timer hardware context through ACPI sleep state transitions.
904 * Timer registers (including the main counter) may not be preserved through
905 * ACPI S3, S4, or S5 sleep states. This code does not not support S1 nor S2.
906 *
907 * Current HPET state is already in hpet.supported and
908 * hpet_state.proxy_installed. hpet_info contains the proxy interrupt HPET
909 * Timer state.
910 *
911 * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
912 * S4, and it is not saved/restored here. Future projects cannot expect the
913 * Main Counter to be monotomically (or accurately) increasing across CPR.
914 *
915 * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
916 * CPUs are awake and in a spin loop before the system suspends. The HPET is
917 * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
918 * It is safe to leave the HPET running as the system suspends; we just
919 * disable the timer from generating interrupts here.
920 */
921 static boolean_t
hpet_cpr(int code)922 hpet_cpr(int code)
923 {
924 ulong_t intr, dead_count = 0;
925 hrtime_t dead = gethrtime() + hpet_spin_timeout;
926 boolean_t ret = B_TRUE;
927
928 mutex_enter(&hpet_state_lock);
929 switch (code) {
930 case CB_CODE_CPR_CHKPT:
931 if (hpet_state.proxy_installed == B_FALSE)
932 break;
933
934 hpet_state.cpr = B_TRUE;
935
936 intr = intr_clear();
937 while (!mutex_tryenter(&hpet_proxy_lock)) {
938 /*
939 * spin
940 */
941 intr_restore(intr);
942 if (dead_count++ > hpet_spin_check) {
943 dead_count = 0;
944 if (gethrtime() > dead) {
945 hpet_state.cpr = B_FALSE;
946 mutex_exit(&hpet_state_lock);
947 cmn_err(CE_NOTE, "!hpet_cpr: deadman");
948 return (B_FALSE);
949 }
950 }
951 intr = intr_clear();
952 }
953 hpet_expire_all();
954 mutex_exit(&hpet_proxy_lock);
955 intr_restore(intr);
956
957 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
958 break;
959
960 case CB_CODE_CPR_RESUME:
961 if (hpet_resume() == B_TRUE)
962 hpet_state.cpr = B_FALSE;
963 else
964 cmn_err(CE_NOTE, "!hpet_resume failed.");
965 break;
966
967 default:
968 cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
969 ret = B_FALSE;
970 break;
971 }
972 mutex_exit(&hpet_state_lock);
973 return (ret);
974 }
975
976 /*
977 * Assume the HPET stopped in Suspend state and timer state was lost.
978 */
979 static boolean_t
hpet_resume(void)980 hpet_resume(void)
981 {
982 if (hpet.supported != HPET_TIMER_SUPPORT)
983 return (B_TRUE);
984
985 /*
986 * The HPET spec does not specify if Legacy Replacement Route is
987 * on or off by default, so we set it off here.
988 */
989 (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
990
991 if (hpet_start_main_counter(&hpet_info) != AE_OK) {
992 cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
993 hpet.supported = HPET_NO_SUPPORT;
994 if (hpet_state.proxy_installed == B_TRUE) {
995 hpet_state.proxy_installed = B_FALSE;
996 hpet_uninstall_interrupt_handler();
997 }
998 return (B_FALSE);
999 }
1000
1001 if (hpet_state.proxy_installed == B_FALSE)
1002 return (B_TRUE);
1003
1004 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
1005 hpet_info.cstate_timer.intr);
1006 if (hpet_state.cpu_deep_idle == B_TRUE)
1007 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
1008
1009 return (B_TRUE);
1010 }
1011
1012 /*
1013 * Callback to enable/disable Deep C-States based on power.conf setting.
1014 */
1015 static boolean_t
hpet_deep_idle_config(int code)1016 hpet_deep_idle_config(int code)
1017 {
1018 ulong_t intr, dead_count = 0;
1019 hrtime_t dead = gethrtime() + hpet_spin_timeout;
1020 boolean_t ret = B_TRUE;
1021
1022 mutex_enter(&hpet_state_lock);
1023 switch (code) {
1024 case PM_DEFAULT_CPU_DEEP_IDLE:
1025 /*FALLTHROUGH*/
1026 case PM_ENABLE_CPU_DEEP_IDLE:
1027
1028 if (hpet_state.cpu_deep_idle == B_TRUE)
1029 break;
1030
1031 if (hpet_state.proxy_installed == B_FALSE) {
1032 ret = B_FALSE; /* Deep C-States not supported */
1033 break;
1034 }
1035
1036 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
1037 hpet_state.cpu_deep_idle = B_TRUE;
1038 break;
1039
1040 case PM_DISABLE_CPU_DEEP_IDLE:
1041
1042 if ((hpet_state.cpu_deep_idle == B_FALSE) ||
1043 (hpet_state.proxy_installed == B_FALSE))
1044 break;
1045
1046 /*
1047 * The order of these operations is important to avoid
1048 * lost wakeups: Set a flag to refuse all future LAPIC Timer
1049 * proxy requests, then wake up all CPUs from deep C-state,
1050 * and finally disable the HPET interrupt-generating timer.
1051 */
1052 hpet_state.cpu_deep_idle = B_FALSE;
1053
1054 intr = intr_clear();
1055 while (!mutex_tryenter(&hpet_proxy_lock)) {
1056 /*
1057 * spin
1058 */
1059 intr_restore(intr);
1060 if (dead_count++ > hpet_spin_check) {
1061 dead_count = 0;
1062 if (gethrtime() > dead) {
1063 hpet_state.cpu_deep_idle = B_TRUE;
1064 mutex_exit(&hpet_state_lock);
1065 cmn_err(CE_NOTE,
1066 "!hpet_deep_idle_config: deadman");
1067 return (B_FALSE);
1068 }
1069 }
1070 intr = intr_clear();
1071 }
1072 hpet_expire_all();
1073 mutex_exit(&hpet_proxy_lock);
1074 intr_restore(intr);
1075
1076 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
1077 break;
1078
1079 default:
1080 cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
1081 code);
1082 ret = B_FALSE;
1083 break;
1084 }
1085 mutex_exit(&hpet_state_lock);
1086
1087 return (ret);
1088 }
1089
1090 /*
1091 * Callback for _CST c-state change notifications.
1092 */
1093 static void
hpet_cst_callback(uint32_t code)1094 hpet_cst_callback(uint32_t code)
1095 {
1096 ulong_t intr, dead_count = 0;
1097 hrtime_t dead = gethrtime() + hpet_spin_timeout;
1098
1099 switch (code) {
1100 case CST_EVENT_ONE_CSTATE:
1101 hpet_state.uni_cstate = B_TRUE;
1102 intr = intr_clear();
1103 while (!mutex_tryenter(&hpet_proxy_lock)) {
1104 /*
1105 * spin
1106 */
1107 intr_restore(intr);
1108 if (dead_count++ > hpet_spin_check) {
1109 dead_count = 0;
1110 if (gethrtime() > dead) {
1111 hpet_expire_all();
1112 cmn_err(CE_NOTE,
1113 "!hpet_cst_callback: deadman");
1114 return;
1115 }
1116 }
1117 intr = intr_clear();
1118 }
1119 hpet_expire_all();
1120 mutex_exit(&hpet_proxy_lock);
1121 intr_restore(intr);
1122 break;
1123
1124 case CST_EVENT_MULTIPLE_CSTATES:
1125 hpet_state.uni_cstate = B_FALSE;
1126 break;
1127
1128 default:
1129 cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
1130 break;
1131 }
1132 }
1133
1134 /*
1135 * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1136 * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1137 * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1138 * needed.
1139 */
1140 static uint_t
hpet_isr(caddr_t arg __unused,caddr_t arg1 __unused)1141 hpet_isr(caddr_t arg __unused, caddr_t arg1 __unused)
1142 {
1143 uint64_t timer_status;
1144 uint64_t timer_mask;
1145 ulong_t intr, dead_count = 0;
1146 hrtime_t dead = gethrtime() + hpet_isr_spin_timeout;
1147
1148 timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1149
1150 /*
1151 * We are using a level-triggered interrupt.
1152 * HPET sets timer's General Interrupt Status Register bit N.
1153 * ISR checks this bit to see if it needs servicing.
1154 * ISR then clears this bit by writing 1 to that bit.
1155 */
1156 timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1157 if (!(timer_status & timer_mask))
1158 return (DDI_INTR_UNCLAIMED);
1159 hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1160
1161 /*
1162 * Do not touch ISR data structures before checking the HPET's General
1163 * Interrupt Status register. The General Interrupt Status register
1164 * will not be set by hardware until after timer interrupt generation
1165 * is enabled by software. Software allocates necessary data
1166 * structures before enabling timer interrupts. ASSERT the software
1167 * data structures required to handle this interrupt are initialized.
1168 */
1169 ASSERT(hpet_proxy_users != NULL);
1170
1171 /*
1172 * CPUs in deep c-states do not enable interrupts until after
1173 * performing idle cleanup which includes descheduling themselves from
1174 * the HPET. The CPU running this ISR will NEVER find itself in the
1175 * proxy list. A lost wakeup may occur if this is false.
1176 */
1177 ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1178
1179 /*
1180 * Higher level interrupts may deadlock with CPUs going idle if this
1181 * ISR is prempted while holding hpet_proxy_lock.
1182 */
1183 intr = intr_clear();
1184 while (!mutex_tryenter(&hpet_proxy_lock)) {
1185 /*
1186 * spin
1187 */
1188 intr_restore(intr);
1189 if (dead_count++ > hpet_spin_check) {
1190 dead_count = 0;
1191 if (gethrtime() > dead) {
1192 hpet_expire_all();
1193 return (DDI_INTR_CLAIMED);
1194 }
1195 }
1196 intr = intr_clear();
1197 }
1198 (void) hpet_guaranteed_schedule(HPET_INFINITY);
1199 mutex_exit(&hpet_proxy_lock);
1200 intr_restore(intr);
1201
1202 return (DDI_INTR_CLAIMED);
1203 }
1204
1205 /*
1206 * Used when disabling the HPET Timer interrupt. CPUs in Deep C-state must be
1207 * woken up because they can no longer rely on the HPET's Timer to wake them.
1208 * We do not need to wait for CPUs to wakeup.
1209 */
1210 static void
hpet_expire_all(void)1211 hpet_expire_all(void)
1212 {
1213 processorid_t id;
1214
1215 for (id = 0; id < max_ncpus; ++id) {
1216 if (hpet_proxy_users[id] != HPET_INFINITY) {
1217 hpet_proxy_users[id] = HPET_INFINITY;
1218 if (id != CPU->cpu_id)
1219 poke_cpu(id);
1220 }
1221 }
1222 }
1223
1224 /*
1225 * To avoid missed wakeups this function must guarantee either the HPET timer
1226 * was successfully programmed to the next expire time or there are no waiting
1227 * CPUs.
1228 *
1229 * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1230 * generate its next interrupt to happen at required_wakeup_time or sooner.
1231 * Returns B_TRUE if the HPET was programmed to interrupt by
1232 * required_wakeup_time, B_FALSE if not.
1233 */
1234 static boolean_t
hpet_guaranteed_schedule(hrtime_t required_wakeup_time)1235 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1236 {
1237 hrtime_t now, next_proxy_time;
1238 processorid_t id, next_proxy_id;
1239 int proxy_timer = hpet_info.cstate_timer.timer;
1240 boolean_t done = B_FALSE;
1241
1242 ASSERT(mutex_owned(&hpet_proxy_lock));
1243
1244 /*
1245 * Loop until we successfully program the HPET,
1246 * or no CPUs are scheduled to use the HPET as a proxy.
1247 */
1248 do {
1249 /*
1250 * Wake all CPUs that expired before now.
1251 * Find the next CPU to wake up and next HPET program time.
1252 */
1253 now = gethrtime();
1254 next_proxy_time = HPET_INFINITY;
1255 next_proxy_id = CPU->cpu_id;
1256 for (id = 0; id < max_ncpus; ++id) {
1257 if (hpet_proxy_users[id] < now) {
1258 hpet_proxy_users[id] = HPET_INFINITY;
1259 if (id != CPU->cpu_id)
1260 poke_cpu(id);
1261 } else if (hpet_proxy_users[id] < next_proxy_time) {
1262 next_proxy_time = hpet_proxy_users[id];
1263 next_proxy_id = id;
1264 }
1265 }
1266
1267 if (next_proxy_time == HPET_INFINITY) {
1268 done = B_TRUE;
1269 /*
1270 * There are currently no CPUs using the HPET's Timer
1271 * as a proxy for their LAPIC Timer. The HPET's Timer
1272 * does not need to be programmed.
1273 *
1274 * Letting the HPET timer wrap around to the current
1275 * time is the longest possible timeout.
1276 * A 64-bit timer will wrap around in ~ 2^44 seconds.
1277 * A 32-bit timer will wrap around in ~ 2^12 seconds.
1278 *
1279 * Disabling the HPET's timer interrupt requires a
1280 * (relatively expensive) write to the HPET.
1281 * Instead we do nothing.
1282 *
1283 * We are gambling some CPU will attempt to enter a
1284 * deep c-state before the timer wraps around.
1285 * We assume one spurious interrupt in a little over an
1286 * hour has less performance impact than writing to the
1287 * HPET's timer disable bit every time all CPUs wakeup
1288 * from deep c-state.
1289 */
1290
1291 } else {
1292 /*
1293 * Idle CPUs disable interrupts before programming the
1294 * HPET to prevent a lost wakeup if the HPET
1295 * interrupts the idle cpu before it can enter a
1296 * Deep C-State.
1297 */
1298 if (hpet_timer_program(&hpet_info, proxy_timer,
1299 HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1300 != AE_OK) {
1301 /*
1302 * We could not program the HPET to wakeup the
1303 * next CPU. We must wake the CPU ourself to
1304 * avoid a lost wakeup.
1305 */
1306 hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1307 if (next_proxy_id != CPU->cpu_id)
1308 poke_cpu(next_proxy_id);
1309 } else {
1310 done = B_TRUE;
1311 }
1312 }
1313
1314 } while (!done);
1315
1316 return (next_proxy_time <= required_wakeup_time);
1317 }
1318
1319 /*
1320 * Use an HPET timer to act as this CPU's proxy local APIC timer.
1321 * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1322 * Called by the idle thread with interrupts enabled.
1323 * Always returns with interrupts disabled.
1324 *
1325 * There are 3 possible outcomes from this function:
1326 * 1. The Local APIC Timer was already disabled before this function was called.
1327 * LAPIC TIMER : disabled
1328 * HPET : not scheduled to wake this CPU
1329 * *lapic_expire : (hrtime_t)HPET_INFINITY
1330 * Returns : B_TRUE
1331 * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1332 * LAPIC TIMER : disabled
1333 * HPET : scheduled to wake this CPU
1334 * *lapic_expire : hrtime_t when LAPIC timer would have expired
1335 * Returns : B_TRUE
1336 * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1337 * LAPIC TIMER : enabled
1338 * HPET : not scheduled to wake this CPU
1339 * *lapic_expire : (hrtime_t)HPET_INFINITY
1340 * Returns : B_FALSE
1341 *
1342 * The idle thread cannot enter Deep C-State in case 3.
1343 * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1344 */
1345 static boolean_t
hpet_use_hpet_timer(hrtime_t * lapic_expire)1346 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1347 {
1348 hrtime_t now, expire, dead;
1349 uint64_t lapic_count, dead_count;
1350 cpupart_t *cpu_part;
1351 processorid_t cpu_sid;
1352 processorid_t cpu_id = CPU->cpu_id;
1353 processorid_t id;
1354 boolean_t rslt;
1355 boolean_t hset_update;
1356
1357 cpu_part = CPU->cpu_part;
1358 cpu_sid = CPU->cpu_seqid;
1359
1360 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1361
1362 /*
1363 * A critical section exists between when the HPET is programmed
1364 * to interrupt the CPU and when this CPU enters an idle state.
1365 * Interrupts must be blocked during that time to prevent lost
1366 * CBE wakeup interrupts from either LAPIC or HPET.
1367 *
1368 * Must block interrupts before acquiring hpet_proxy_lock to prevent
1369 * a deadlock with the ISR if the ISR runs on this CPU after the
1370 * idle thread acquires the mutex but before it clears interrupts.
1371 */
1372 ASSERT(!interrupts_enabled());
1373 lapic_count = apic_timer_stop_count_fn();
1374 now = gethrtime();
1375 dead = now + hpet_idle_spin_timeout;
1376 *lapic_expire = expire = now + lapic_count;
1377 if (lapic_count == (hrtime_t)-1) {
1378 /*
1379 * LAPIC timer is currently disabled.
1380 * Will not use the HPET as a LAPIC Timer proxy.
1381 */
1382 *lapic_expire = (hrtime_t)HPET_INFINITY;
1383 return (B_TRUE);
1384 }
1385
1386 /*
1387 * Serialize hpet_proxy data structure manipulation.
1388 */
1389 dead_count = 0;
1390 while (!mutex_tryenter(&hpet_proxy_lock)) {
1391 /*
1392 * spin
1393 */
1394 apic_timer_restart_fn(expire);
1395 sti();
1396 cli();
1397
1398 if (dead_count++ > hpet_spin_check) {
1399 dead_count = 0;
1400 hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1401 (ncpus > 1));
1402 if (hset_update &&
1403 !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1404 *lapic_expire = (hrtime_t)HPET_INFINITY;
1405 return (B_FALSE);
1406 }
1407 }
1408
1409 lapic_count = apic_timer_stop_count_fn();
1410 now = gethrtime();
1411 *lapic_expire = expire = now + lapic_count;
1412 if (lapic_count == (hrtime_t)-1) {
1413 /*
1414 * LAPIC timer is currently disabled.
1415 * Will not use the HPET as a LAPIC Timer proxy.
1416 */
1417 *lapic_expire = (hrtime_t)HPET_INFINITY;
1418 return (B_TRUE);
1419 }
1420 if (now > dead) {
1421 apic_timer_restart_fn(expire);
1422 *lapic_expire = (hrtime_t)HPET_INFINITY;
1423 return (B_FALSE);
1424 }
1425 }
1426
1427 if ((hpet_state.cpr == B_TRUE) ||
1428 (hpet_state.cpu_deep_idle == B_FALSE) ||
1429 (hpet_state.proxy_installed == B_FALSE) ||
1430 (hpet_state.uni_cstate == B_TRUE)) {
1431 mutex_exit(&hpet_proxy_lock);
1432 apic_timer_restart_fn(expire);
1433 *lapic_expire = (hrtime_t)HPET_INFINITY;
1434 return (B_FALSE);
1435 }
1436
1437 hpet_proxy_users[cpu_id] = expire;
1438
1439 /*
1440 * We are done if another cpu is scheduled on the HPET with an
1441 * expire time before us. The next HPET interrupt has been programmed
1442 * to fire before our expire time.
1443 */
1444 for (id = 0; id < max_ncpus; ++id) {
1445 if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1446 mutex_exit(&hpet_proxy_lock);
1447 return (B_TRUE);
1448 }
1449 }
1450
1451 /*
1452 * We are the next lAPIC to expire.
1453 * Program the HPET with our expire time.
1454 */
1455 rslt = hpet_guaranteed_schedule(expire);
1456 mutex_exit(&hpet_proxy_lock);
1457
1458 if (rslt == B_FALSE) {
1459 apic_timer_restart_fn(expire);
1460 *lapic_expire = (hrtime_t)HPET_INFINITY;
1461 }
1462
1463 return (rslt);
1464 }
1465
1466 /*
1467 * Called by the idle thread when waking up from Deep C-state before enabling
1468 * interrupts. With an array data structure it is faster to always remove
1469 * ourself from the array without checking if the HPET ISR already removed.
1470 *
1471 * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1472 * We do not reprogram the HPET here because this CPU has real work to do.
1473 * On a idle system the CPU was probably woken up by the HPET's ISR.
1474 * On a heavily loaded system CPUs are not going into Deep C-state.
1475 * On a moderately loaded system another CPU will usually enter Deep C-state
1476 * and reprogram the HPET before the HPET fires with our wakeup.
1477 */
1478 static void
hpet_use_lapic_timer(hrtime_t expire)1479 hpet_use_lapic_timer(hrtime_t expire)
1480 {
1481 processorid_t cpu_id = CPU->cpu_id;
1482
1483 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1484 ASSERT(!interrupts_enabled());
1485
1486 hpet_proxy_users[cpu_id] = HPET_INFINITY;
1487
1488 /*
1489 * Do not enable a LAPIC Timer that was initially disabled.
1490 */
1491 if (expire != HPET_INFINITY)
1492 apic_timer_restart_fn(expire);
1493 }
1494
1495 /*
1496 * Initialize data structure to keep track of CPUs using HPET as a proxy for
1497 * their stalled local APIC timer. For now this is just an array.
1498 */
1499 static void
hpet_init_proxy_data(void)1500 hpet_init_proxy_data(void)
1501 {
1502 processorid_t id;
1503
1504 /*
1505 * Use max_ncpus for hot plug compliance.
1506 */
1507 hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1508 KM_SLEEP);
1509
1510 /*
1511 * Unused entries always contain HPET_INFINITY.
1512 */
1513 for (id = 0; id < max_ncpus; ++id)
1514 hpet_proxy_users[id] = HPET_INFINITY;
1515 }
1516