xref: /illumos-gate/usr/src/uts/i86pc/io/hpet_acpi.c (revision 8ce3ffdfd4c1bd6be03a31b5019c67a6c920ca54)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/hpet_acpi.h>
27 #include <sys/hpet.h>
28 #include <sys/bitmap.h>
29 #include <sys/inttypes.h>
30 #include <sys/time.h>
31 #include <sys/sunddi.h>
32 #include <sys/ksynch.h>
33 #include <sys/apic.h>
34 #include <sys/callb.h>
35 #include <sys/clock.h>
36 #include <sys/archsystm.h>
37 #include <sys/cpupart.h>
38 
39 /*
40  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
41  * and to synchronize suspend/resume.
42  */
43 static kmutex_t		hpet_state_lock;
44 static struct hpet_state {
45 	boolean_t	proxy_installed;	/* CBE proxy interrupt setup */
46 	boolean_t	cpr;			/* currently in CPR */
47 	boolean_t	cpu_deep_idle;		/* user enable/disable */
48 	boolean_t	uni_cstate;		/* disable if only one cstate */
49 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
50 
51 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
52 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
53 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
54 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
55 
56 static kmutex_t		hpet_proxy_lock;	/* lock for lAPIC proxy data */
57 /*
58  * hpet_proxy_users is a per-cpu array.
59  */
60 static hpet_proxy_t	*hpet_proxy_users;	/* one per CPU */
61 
62 
63 ACPI_TABLE_HPET		*hpet_table;		/* ACPI HPET table */
64 hpet_info_t		hpet_info;		/* Human readable Information */
65 
66 /*
67  * Provide HPET access from unix.so.
68  * Set up pointers to access symbols in pcplusmp.
69  */
70 static void
71 hpet_establish_hooks(void)
72 {
73 	hpet.install_proxy = &hpet_install_proxy;
74 	hpet.callback = &hpet_callback;
75 	hpet.use_hpet_timer = &hpet_use_hpet_timer;
76 	hpet.use_lapic_timer = &hpet_use_lapic_timer;
77 }
78 
79 /*
80  * Get the ACPI "HPET" table.
81  * acpi_probe() calls this function from mp_startup before drivers are loaded.
82  * acpi_probe() verified the system is using ACPI before calling this.
83  *
84  * There may be more than one ACPI HPET table (Itanium only?).
85  * Intel's HPET spec defines each timer block to have up to 32 counters and
86  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
87  * Each timer block would have an additional ACPI HPET table.
88  * Typical x86 systems today only have 1 HPET with 3 counters.
89  * On x86 we only consume HPET table "1" for now.
90  */
91 int
92 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
93 {
94 	extern hrtime_t tsc_read(void);
95 	extern int	idle_cpu_no_deep_c;
96 	extern int	cpuid_deep_cstates_supported(void);
97 	void		*la;
98 	uint64_t	ret;
99 	uint_t		num_timers;
100 	uint_t		ti;
101 
102 	(void) memset(&hpet_info, 0, sizeof (hpet_info));
103 	hpet.supported = HPET_NO_SUPPORT;
104 
105 	if (idle_cpu_no_deep_c)
106 		return (DDI_FAILURE);
107 
108 	if (!cpuid_deep_cstates_supported())
109 		return (DDI_FAILURE);
110 
111 	hpet_establish_hooks();
112 
113 	/*
114 	 * Get HPET ACPI table 1.
115 	 */
116 	if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
117 	    (ACPI_TABLE_HEADER **)&hpet_table))) {
118 		cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
119 		return (DDI_FAILURE);
120 	}
121 
122 	if (hpet_validate_table(hpet_table) != AE_OK) {
123 		cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
124 		return (DDI_FAILURE);
125 	}
126 
127 	la = hpet_memory_map(hpet_table);
128 	if (la == NULL) {
129 		cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
130 		return (DDI_FAILURE);
131 	}
132 	hpet_info.logical_address = la;
133 
134 	ret = hpet_read_gen_cap(&hpet_info);
135 	hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
136 	hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
137 	hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
138 	hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
139 	/*
140 	 * Hardware contains the last timer's number.
141 	 * Add 1 to get the number of timers.
142 	 */
143 	hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
144 	hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
145 
146 	if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
147 		cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
148 		    (long)hpet_info.gen_cap.counter_clk_period,
149 		    (long)HPET_MAX_CLK_PERIOD);
150 		return (DDI_FAILURE);
151 	}
152 
153 	num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
154 	if ((num_timers < 3) || (num_timers > 32)) {
155 		cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
156 		    "%lx", (long)num_timers);
157 		return (DDI_FAILURE);
158 	}
159 	hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
160 	    num_timers * sizeof (uint64_t), KM_SLEEP);
161 
162 	ret = hpet_read_gen_config(&hpet_info);
163 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
164 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
165 
166 	/*
167 	 * Solaris does not use the HPET Legacy Replacement Route capabilities.
168 	 * This feature has been off by default on test systems.
169 	 * The HPET spec does not specify if Legacy Replacement Route is
170 	 * on or off by default, so we explicitely set it off here.
171 	 * It should not matter which mode the HPET is in since we use
172 	 * the first available non-legacy replacement timer: timer 2.
173 	 */
174 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
175 
176 	ret = hpet_read_gen_config(&hpet_info);
177 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
178 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
179 
180 	hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
181 	hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
182 
183 	for (ti = 0; ti < num_timers; ++ti) {
184 		ret = hpet_read_timer_N_config(&hpet_info, ti);
185 		/*
186 		 * Make sure no timers are enabled (think fast reboot or
187 		 * virtual hardware).
188 		 */
189 		if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
190 			hpet_disable_timer(&hpet_info, ti);
191 			ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
192 		}
193 
194 		hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
195 	}
196 
197 	/*
198 	 * Be aware the Main Counter may need to be initialized in the future
199 	 * if it is used for more than just Deep C-State support.
200 	 * The HPET's Main Counter does not need to be initialize to a specific
201 	 * value before starting it for use to wake up CPUs from Deep C-States.
202 	 */
203 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
204 		cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
205 		return (DDI_FAILURE);
206 	}
207 
208 	hpet_info.period = hpet_info.gen_cap.counter_clk_period;
209 	/*
210 	 * Read main counter twice to record HPET latency for debugging.
211 	 */
212 	hpet_info.tsc[0] = tsc_read();
213 	hpet_info.hpet_main_counter_reads[0] =
214 	    hpet_read_main_counter_value(&hpet_info);
215 	hpet_info.tsc[1] = tsc_read();
216 	hpet_info.hpet_main_counter_reads[1] =
217 	    hpet_read_main_counter_value(&hpet_info);
218 	hpet_info.tsc[2] = tsc_read();
219 
220 	ret = hpet_read_gen_config(&hpet_info);
221 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
222 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
223 
224 	/*
225 	 * HPET main counter reads are supported now.
226 	 */
227 	hpet.supported = HPET_TIMER_SUPPORT;
228 
229 	return (hpet_init_proxy(hpet_vect, hpet_flags));
230 }
231 
232 void
233 hpet_acpi_fini(void)
234 {
235 	if (hpet.supported == HPET_NO_SUPPORT)
236 		return;
237 	if (hpet.supported >= HPET_TIMER_SUPPORT)
238 		(void) hpet_stop_main_counter(&hpet_info);
239 	if (hpet.supported > HPET_TIMER_SUPPORT)
240 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
241 }
242 
243 /*
244  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
245  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
246  * Setup data to handle the timer's ISR, and add the timer's interrupt.
247  *
248  * The ddi cannot be use to allocate the HPET timer's interrupt.
249  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
250  * to handle the HPET timer's interrupt.
251  *
252  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
253  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
254  */
255 static int
256 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
257 {
258 	if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
259 		cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
260 		return (DDI_FAILURE);
261 	}
262 
263 	hpet_init_proxy_data();
264 
265 	if (hpet_install_interrupt_handler(&hpet_isr,
266 	    hpet_info.cstate_timer.intr) != AE_OK) {
267 		cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
268 		return (DDI_FAILURE);
269 	}
270 	*hpet_vect = hpet_info.cstate_timer.intr;
271 	hpet_flags->intr_el = INTR_EL_LEVEL;
272 	hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
273 	hpet_flags->bustype = BUS_PCI;		/*  we *do* conform to PCI */
274 
275 	/*
276 	 * Avoid a possibly stuck interrupt by programing the HPET's timer here
277 	 * before the I/O APIC is programmed to handle this interrupt.
278 	 */
279 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
280 	    hpet_info.cstate_timer.intr);
281 
282 	/*
283 	 * All HPET functionality is supported.
284 	 */
285 	hpet.supported = HPET_FULL_SUPPORT;
286 	return (DDI_SUCCESS);
287 }
288 
289 /*
290  * Called by kernel if it can support Deep C-States.
291  */
292 static boolean_t
293 hpet_install_proxy(void)
294 {
295 	if (hpet_state.proxy_installed == B_TRUE)
296 		return (B_TRUE);
297 
298 	if (hpet.supported != HPET_FULL_SUPPORT)
299 		return (B_FALSE);
300 
301 	hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
302 	hpet_state.proxy_installed = B_TRUE;
303 
304 	return (B_TRUE);
305 }
306 
307 /*
308  * Remove the interrupt that was added with add_avintr() in
309  * hpet_install_interrupt_handler().
310  */
311 static void
312 hpet_uninstall_interrupt_handler(void)
313 {
314 	rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
315 	    hpet_info.cstate_timer.intr);
316 }
317 
318 static int
319 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
320 {
321 	ACPI_TABLE_HEADER	*table_header = (ACPI_TABLE_HEADER *)hpet_table;
322 
323 	if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
324 		cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
325 		    "ACPI_TABLE_HPET) %lx.",
326 		    (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
327 		    (unsigned long)sizeof (ACPI_TABLE_HPET));
328 		return (AE_ERROR);
329 	}
330 
331 	if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
332 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
333 		    "signature");
334 		return (AE_ERROR);
335 	}
336 
337 	if (!hpet_checksum_table((unsigned char *)hpet_table,
338 	    (unsigned int)table_header->Length)) {
339 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
340 		return (AE_ERROR);
341 	}
342 
343 	/*
344 	 * Sequence should be table number - 1.  We are using table 1.
345 	 */
346 	if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
347 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
348 		    (long)hpet_table->Sequence);
349 		return (AE_ERROR);
350 	}
351 
352 	return (AE_OK);
353 }
354 
355 static boolean_t
356 hpet_checksum_table(unsigned char *table, unsigned int length)
357 {
358 	unsigned char	checksum = 0;
359 	int		i;
360 
361 	for (i = 0; i < length; ++i, ++table)
362 		checksum += *table;
363 
364 	return (checksum == 0);
365 }
366 
367 static void *
368 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
369 {
370 	return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
371 }
372 
373 static int
374 hpet_start_main_counter(hpet_info_t *hip)
375 {
376 	uint64_t	*gcr_ptr;
377 	uint64_t	gcr;
378 
379 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
380 	gcr = *gcr_ptr;
381 
382 	gcr |= HPET_GCFR_ENABLE_CNF;
383 	*gcr_ptr = gcr;
384 	gcr = *gcr_ptr;
385 
386 	return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
387 }
388 
389 static int
390 hpet_stop_main_counter(hpet_info_t *hip)
391 {
392 	uint64_t	*gcr_ptr;
393 	uint64_t	gcr;
394 
395 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
396 	gcr = *gcr_ptr;
397 
398 	gcr &= ~HPET_GCFR_ENABLE_CNF;
399 	*gcr_ptr = gcr;
400 	gcr = *gcr_ptr;
401 
402 	return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
403 }
404 
405 /*
406  * Set the Legacy Replacement Route bit.
407  * This should be called before setting up timers.
408  * The HPET specification is silent regarding setting this after timers are
409  * programmed.
410  */
411 static uint64_t
412 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
413 {
414 	uint64_t gen_conf = hpet_read_gen_config(hip);
415 
416 	switch (new_value) {
417 	case 0:
418 		gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
419 		break;
420 
421 	case HPET_GCFR_LEG_RT_CNF:
422 		gen_conf |= HPET_GCFR_LEG_RT_CNF;
423 		break;
424 
425 	default:
426 		ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
427 		break;
428 	}
429 	hpet_write_gen_config(hip, gen_conf);
430 	return (gen_conf);
431 }
432 
433 static uint64_t
434 hpet_read_gen_cap(hpet_info_t *hip)
435 {
436 	return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
437 }
438 
439 static uint64_t
440 hpet_read_gen_config(hpet_info_t *hip)
441 {
442 	return (*(uint64_t *)
443 	    HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
444 }
445 
446 static uint64_t
447 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
448 {
449 	hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
450 	    hip->logical_address);
451 	return (hip->gen_intrpt_stat);
452 }
453 
454 static uint64_t
455 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
456 {
457 	uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
458 	    hip->logical_address, n);
459 	hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
460 	return (conf);
461 }
462 
463 static hpet_TN_conf_cap_t
464 hpet_convert_timer_N_config(uint64_t conf)
465 {
466 	hpet_TN_conf_cap_t cc = { 0 };
467 
468 	cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
469 	cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
470 	cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
471 	cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
472 	cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
473 	cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
474 	cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
475 	cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
476 	cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
477 	cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
478 	cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
479 
480 	return (cc);
481 }
482 
483 static uint64_t
484 hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n)
485 {
486 	if (hip->timer_n_config[n].size_cap == 1)
487 		return (*(uint64_t *)
488 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
489 	else
490 		return (*(uint32_t *)
491 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
492 }
493 
494 static uint64_t
495 hpet_read_main_counter_value(hpet_info_t *hip)
496 {
497 	uint64_t	value;
498 	uint32_t	*counter;
499 	uint32_t	high1, high2, low;
500 
501 	counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
502 
503 	/*
504 	 * 32-bit main counters
505 	 */
506 	if (hip->gen_cap.count_size_cap == 0) {
507 		value = (uint64_t)*counter;
508 		hip->main_counter_value = value;
509 		return (value);
510 	}
511 
512 	/*
513 	 * HPET spec claims a 64-bit read can be split into two 32-bit reads
514 	 * by the hardware connection to the HPET.
515 	 */
516 	high2 = counter[1];
517 	do {
518 		high1 = high2;
519 		low = counter[0];
520 		high2 = counter[1];
521 	} while (high2 != high1);
522 
523 	value = ((uint64_t)high1 << 32) | low;
524 	hip->main_counter_value = value;
525 	return (value);
526 }
527 
528 static void
529 hpet_write_gen_cap(hpet_info_t *hip, uint64_t l)
530 {
531 	*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address) = l;
532 }
533 
534 static void
535 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
536 {
537 	*(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
538 }
539 
540 static void
541 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
542 {
543 	*(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
544 }
545 
546 static void
547 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
548 {
549 	if (hip->timer_n_config[n].size_cap == 1)
550 		*(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
551 		    hip->logical_address, n) = l;
552 	else
553 		*(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
554 		    hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
555 }
556 
557 static void
558 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
559 {
560 	*(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
561 }
562 
563 static void
564 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
565 {
566 	uint64_t l;
567 
568 	l = hpet_read_timer_N_config(hip, timer_n);
569 	l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
570 	hpet_write_timer_N_config(hip, timer_n, l);
571 }
572 
573 static void
574 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
575 {
576 	uint64_t l;
577 
578 	l = hpet_read_timer_N_config(hip, timer_n);
579 	l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
580 	hpet_write_timer_N_config(hip, timer_n, l);
581 }
582 
583 static void
584 hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l)
585 {
586 	uint32_t	*address;
587 
588 	/*
589 	 * HPET spec 1.0a states main counter register should be halted before
590 	 * it is written to.
591 	 */
592 	ASSERT(!(hpet_read_gen_config(hip) & HPET_GCFR_ENABLE_CNF));
593 
594 	if (hip->gen_cap.count_size_cap == 1) {
595 		*(uint64_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address)
596 		    = l;
597 	} else {
598 		address = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(
599 		    hip->logical_address);
600 
601 		address[0] = (uint32_t)(l & 0xFFFFFFFF);
602 	}
603 }
604 
605 /*
606  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
607  *
608  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
609  * from apic_picinit() psm_ops apic_ops entry point after we return from
610  * apic_init() psm_ops entry point.
611  */
612 static uint32_t
613 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
614 {
615 	uint32_t retval;
616 
617 	retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
618 	    vector, NULL, NULL, NULL, NULL);
619 	if (retval == 0) {
620 		cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
621 		return (AE_BAD_PARAMETER);
622 	}
623 	return (AE_OK);
624 }
625 
626 /*
627  * The HPET timers specify which I/O APIC interrupts they can be routed to.
628  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
629  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
630  * timer's timer_n_config register.
631  */
632 static int
633 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
634 {
635 	int	timer;
636 	int	intr;
637 
638 	for (timer = HPET_FIRST_NON_LEGACY_TIMER;
639 	    timer < hip->gen_cap.num_tim_cap; ++timer) {
640 
641 		if (!hpet_timer_available(hip->allocated_timers, timer))
642 			continue;
643 
644 		intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
645 		if (intr >= 0) {
646 			hpet_timer_alloc(&hip->allocated_timers, timer);
647 			hip->cstate_timer.timer = timer;
648 			hip->cstate_timer.intr = intr;
649 			return (timer);
650 		}
651 	}
652 
653 	return (-1);
654 }
655 
656 /*
657  * Mark this timer as used.
658  */
659 static void
660 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
661 {
662 	*allocated_timers |= 1 << n;
663 }
664 
665 /*
666  * Check if this timer is available.
667  * No mutual exclusion because only one thread uses this.
668  */
669 static int
670 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
671 {
672 	return ((allocated_timers & (1 << n)) == 0);
673 }
674 
675 /*
676  * Setup timer N to route its interrupt to I/O APIC.
677  */
678 static void
679 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
680 {
681 	uint64_t conf;
682 
683 	conf = hpet_read_timer_N_config(hip, timer_n);
684 
685 	/*
686 	 * Caller is required to verify this interrupt route is supported.
687 	 */
688 	ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
689 
690 	conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;	/* use IOAPIC */
691 	conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
692 	conf &= ~HPET_TIMER_N_TYPE_CNF_BIT;	/* non periodic */
693 	conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;	/* disabled */
694 	conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;	/* Level Triggered */
695 
696 	hpet_write_timer_N_config(hip, timer_n, conf);
697 }
698 
699 /*
700  * The HPET's Main Counter is not stopped before programming an HPET timer.
701  * This will allow the HPET to be used as a time source.
702  * The programmed timer interrupt may occur before this function returns.
703  * Callers must block interrupts before calling this function if they must
704  * guarantee the interrupt is handled after this function returns.
705  *
706  * Return 0 if main counter is less than timer after enabling timer.
707  * The interrupt was programmed, but it may fire before this returns.
708  * Return !0 if main counter is greater than timer after enabling timer.
709  * In other words: the timer will not fire, and we do not know if it did fire.
710  *
711  * delta is in HPET ticks.
712  *
713  * Writing a 64-bit value to a 32-bit register will "wrap around".
714  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
715  */
716 int
717 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
718 {
719 	uint64_t time, program;
720 
721 	program = hpet_read_main_counter_value(hip);
722 	program += delta;
723 	hpet_write_timer_N_comp(hip, timer, program);
724 
725 	time = hpet_read_main_counter_value(hip);
726 	if (time < program)
727 		return (AE_OK);
728 
729 	return (AE_TIME);
730 }
731 
732 /*
733  * CPR and power policy-change callback entry point.
734  */
735 boolean_t
736 hpet_callback(int code)
737 {
738 	switch (code) {
739 	case PM_DEFAULT_CPU_DEEP_IDLE:
740 		/*FALLTHROUGH*/
741 	case PM_ENABLE_CPU_DEEP_IDLE:
742 		/*FALLTHROUGH*/
743 	case PM_DISABLE_CPU_DEEP_IDLE:
744 		return (hpet_deep_idle_config(code));
745 
746 	case CB_CODE_CPR_RESUME:
747 		/*FALLTHROUGH*/
748 	case CB_CODE_CPR_CHKPT:
749 		return (hpet_cpr(code));
750 
751 	case CST_EVENT_MULTIPLE_CSTATES:
752 		hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
753 		return (B_TRUE);
754 
755 	case CST_EVENT_ONE_CSTATE:
756 		hpet_cst_callback(CST_EVENT_ONE_CSTATE);
757 		return (B_TRUE);
758 
759 	default:
760 		cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
761 		return (B_FALSE);
762 	}
763 }
764 
765 /*
766  * According to the HPET spec 1.0a: the Operating System must save and restore
767  * HPET event timer hardware context through ACPI sleep state transitions.
768  * Timer registers (including the main counter) may not be preserved through
769  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
770  *
771  * Current HPET state is already in hpet.supported and
772  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
773  * Timer state.
774  *
775  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
776  * S4, and it is not saved/restored here.  Future projects cannot expect the
777  * Main Counter to be monotomically (or accurately) increasing across CPR.
778  *
779  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
780  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
781  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
782  * It is safe to leave the HPET running as the system suspends; we just
783  * disable the timer from generating interrupts here.
784  */
785 static boolean_t
786 hpet_cpr(int code)
787 {
788 	ulong_t		intr, dead_count = 0;
789 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
790 	boolean_t	ret = B_TRUE;
791 
792 	mutex_enter(&hpet_state_lock);
793 	switch (code) {
794 	case CB_CODE_CPR_CHKPT:
795 		if (hpet_state.proxy_installed == B_FALSE)
796 			break;
797 
798 		hpet_state.cpr = B_TRUE;
799 
800 		intr = intr_clear();
801 		while (!mutex_tryenter(&hpet_proxy_lock)) {
802 			/*
803 			 * spin
804 			 */
805 			intr_restore(intr);
806 			if (dead_count++ > hpet_spin_check) {
807 				dead_count = 0;
808 				if (gethrtime() > dead) {
809 					hpet_state.cpr = B_FALSE;
810 					mutex_exit(&hpet_state_lock);
811 					cmn_err(CE_NOTE, "!hpet_cpr: deadman");
812 					return (B_FALSE);
813 				}
814 			}
815 			intr = intr_clear();
816 		}
817 		hpet_expire_all();
818 		mutex_exit(&hpet_proxy_lock);
819 		intr_restore(intr);
820 
821 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
822 		break;
823 
824 	case CB_CODE_CPR_RESUME:
825 		if (hpet_resume() == B_TRUE)
826 			hpet_state.cpr = B_FALSE;
827 		else
828 			cmn_err(CE_NOTE, "!hpet_resume failed.");
829 		break;
830 
831 	default:
832 		cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
833 		ret = B_FALSE;
834 		break;
835 	}
836 	mutex_exit(&hpet_state_lock);
837 	return (ret);
838 }
839 
840 /*
841  * Assume the HPET stopped in Suspend state and timer state was lost.
842  */
843 static boolean_t
844 hpet_resume(void)
845 {
846 	if (hpet.supported != HPET_TIMER_SUPPORT)
847 		return (B_TRUE);
848 
849 	/*
850 	 * The HPET spec does not specify if Legacy Replacement Route is
851 	 * on or off by default, so we set it off here.
852 	 */
853 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
854 
855 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
856 		cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
857 		hpet.supported = HPET_NO_SUPPORT;
858 		if (hpet_state.proxy_installed == B_TRUE) {
859 			hpet_state.proxy_installed = B_FALSE;
860 			hpet_uninstall_interrupt_handler();
861 		}
862 		return (B_FALSE);
863 	}
864 
865 	if (hpet_state.proxy_installed == B_FALSE)
866 		return (B_TRUE);
867 
868 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
869 	    hpet_info.cstate_timer.intr);
870 	if (hpet_state.cpu_deep_idle == B_TRUE)
871 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
872 
873 	return (B_TRUE);
874 }
875 
876 /*
877  * Callback to enable/disable Deep C-States based on power.conf setting.
878  */
879 static boolean_t
880 hpet_deep_idle_config(int code)
881 {
882 	ulong_t		intr, dead_count = 0;
883 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
884 	boolean_t	ret = B_TRUE;
885 
886 	mutex_enter(&hpet_state_lock);
887 	switch (code) {
888 	case PM_DEFAULT_CPU_DEEP_IDLE:
889 		/*FALLTHROUGH*/
890 	case PM_ENABLE_CPU_DEEP_IDLE:
891 
892 		if (hpet_state.cpu_deep_idle == B_TRUE)
893 			break;
894 
895 		if (hpet_state.proxy_installed == B_FALSE) {
896 			ret = B_FALSE;  /* Deep C-States not supported */
897 			break;
898 		}
899 
900 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
901 		hpet_state.cpu_deep_idle = B_TRUE;
902 		break;
903 
904 	case PM_DISABLE_CPU_DEEP_IDLE:
905 
906 		if ((hpet_state.cpu_deep_idle == B_FALSE) ||
907 		    (hpet_state.proxy_installed == B_FALSE))
908 			break;
909 
910 		/*
911 		 * The order of these operations is important to avoid
912 		 * lost wakeups: Set a flag to refuse all future LAPIC Timer
913 		 * proxy requests, then wake up all CPUs from deep C-state,
914 		 * and finally disable the HPET interrupt-generating timer.
915 		 */
916 		hpet_state.cpu_deep_idle = B_FALSE;
917 
918 		intr = intr_clear();
919 		while (!mutex_tryenter(&hpet_proxy_lock)) {
920 			/*
921 			 * spin
922 			 */
923 			intr_restore(intr);
924 			if (dead_count++ > hpet_spin_check) {
925 				dead_count = 0;
926 				if (gethrtime() > dead) {
927 					hpet_state.cpu_deep_idle = B_TRUE;
928 					mutex_exit(&hpet_state_lock);
929 					cmn_err(CE_NOTE,
930 					    "!hpet_deep_idle_config: deadman");
931 					return (B_FALSE);
932 				}
933 			}
934 			intr = intr_clear();
935 		}
936 		hpet_expire_all();
937 		mutex_exit(&hpet_proxy_lock);
938 		intr_restore(intr);
939 
940 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
941 		break;
942 
943 	default:
944 		cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
945 		    code);
946 		ret = B_FALSE;
947 		break;
948 	}
949 	mutex_exit(&hpet_state_lock);
950 
951 	return (ret);
952 }
953 
954 /*
955  * Callback for _CST c-state change notifications.
956  */
957 static void
958 hpet_cst_callback(uint32_t code)
959 {
960 	ulong_t		intr, dead_count = 0;
961 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
962 
963 	switch (code) {
964 	case CST_EVENT_ONE_CSTATE:
965 		hpet_state.uni_cstate = B_TRUE;
966 		intr = intr_clear();
967 		while (!mutex_tryenter(&hpet_proxy_lock)) {
968 			/*
969 			 * spin
970 			 */
971 			intr_restore(intr);
972 			if (dead_count++ > hpet_spin_check) {
973 				dead_count = 0;
974 				if (gethrtime() > dead) {
975 					hpet_expire_all();
976 					cmn_err(CE_NOTE,
977 					    "!hpet_cst_callback: deadman");
978 					return;
979 				}
980 			}
981 			intr = intr_clear();
982 		}
983 		hpet_expire_all();
984 		mutex_exit(&hpet_proxy_lock);
985 		intr_restore(intr);
986 		break;
987 
988 	case CST_EVENT_MULTIPLE_CSTATES:
989 		hpet_state.uni_cstate = B_FALSE;
990 		break;
991 
992 	default:
993 		cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
994 		break;
995 	}
996 }
997 
998 /*
999  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1000  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1001  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1002  * needed.
1003  */
1004 /* ARGSUSED */
1005 static uint_t
1006 hpet_isr(char *arg)
1007 {
1008 	uint64_t	timer_status;
1009 	uint64_t	timer_mask;
1010 	ulong_t		intr, dead_count = 0;
1011 	hrtime_t	dead = gethrtime() + hpet_isr_spin_timeout;
1012 
1013 	timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1014 
1015 	/*
1016 	 * We are using a level-triggered interrupt.
1017 	 * HPET sets timer's General Interrupt Status Register bit N.
1018 	 * ISR checks this bit to see if it needs servicing.
1019 	 * ISR then clears this bit by writing 1 to that bit.
1020 	 */
1021 	timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1022 	if (!(timer_status & timer_mask))
1023 		return (DDI_INTR_UNCLAIMED);
1024 	hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1025 
1026 	/*
1027 	 * Do not touch ISR data structures before checking the HPET's General
1028 	 * Interrupt Status register.  The General Interrupt Status register
1029 	 * will not be set by hardware until after timer interrupt generation
1030 	 * is enabled by software.  Software allocates necessary data
1031 	 * structures before enabling timer interrupts.  ASSERT the software
1032 	 * data structures required to handle this interrupt are initialized.
1033 	 */
1034 	ASSERT(hpet_proxy_users != NULL);
1035 
1036 	/*
1037 	 * CPUs in deep c-states do not enable interrupts until after
1038 	 * performing idle cleanup which includes descheduling themselves from
1039 	 * the HPET.  The CPU running this ISR will NEVER find itself in the
1040 	 * proxy list.  A lost wakeup may occur if this is false.
1041 	 */
1042 	ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1043 
1044 	/*
1045 	 * Higher level interrupts may deadlock with CPUs going idle if this
1046 	 * ISR is prempted while holding hpet_proxy_lock.
1047 	 */
1048 	intr = intr_clear();
1049 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1050 		/*
1051 		 * spin
1052 		 */
1053 		intr_restore(intr);
1054 		if (dead_count++ > hpet_spin_check) {
1055 			dead_count = 0;
1056 			if (gethrtime() > dead) {
1057 				hpet_expire_all();
1058 				return (DDI_INTR_CLAIMED);
1059 			}
1060 		}
1061 		intr = intr_clear();
1062 	}
1063 	(void) hpet_guaranteed_schedule(HPET_INFINITY);
1064 	mutex_exit(&hpet_proxy_lock);
1065 	intr_restore(intr);
1066 
1067 	return (DDI_INTR_CLAIMED);
1068 }
1069 
1070 /*
1071  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
1072  * woken up because they can no longer rely on the HPET's Timer to wake them.
1073  * We do not need to wait for CPUs to wakeup.
1074  */
1075 static void
1076 hpet_expire_all(void)
1077 {
1078 	processorid_t	id;
1079 
1080 	for (id = 0; id < ncpus; ++id) {
1081 		if (hpet_proxy_users[id] != HPET_INFINITY) {
1082 			hpet_proxy_users[id] = HPET_INFINITY;
1083 			if (id != CPU->cpu_id)
1084 				poke_cpu(id);
1085 		}
1086 	}
1087 }
1088 
1089 /*
1090  * To avoid missed wakeups this function must guarantee either the HPET timer
1091  * was successfully programmed to the next expire time or there are no waiting
1092  * CPUs.
1093  *
1094  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1095  * generate its next interrupt to happen at required_wakeup_time or sooner.
1096  * Returns B_TRUE if the HPET was programmed to interrupt by
1097  * required_wakeup_time, B_FALSE if not.
1098  */
1099 static boolean_t
1100 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1101 {
1102 	hrtime_t	now, next_proxy_time;
1103 	processorid_t	id, next_proxy_id;
1104 	int		proxy_timer = hpet_info.cstate_timer.timer;
1105 	boolean_t	done = B_FALSE;
1106 
1107 	ASSERT(mutex_owned(&hpet_proxy_lock));
1108 
1109 	/*
1110 	 * Loop until we successfully program the HPET,
1111 	 * or no CPUs are scheduled to use the HPET as a proxy.
1112 	 */
1113 	do {
1114 		/*
1115 		 * Wake all CPUs that expired before now.
1116 		 * Find the next CPU to wake up and next HPET program time.
1117 		 */
1118 		now = gethrtime();
1119 		next_proxy_time = HPET_INFINITY;
1120 		next_proxy_id = CPU->cpu_id;
1121 		for (id = 0; id < ncpus; ++id) {
1122 			if (hpet_proxy_users[id] < now) {
1123 				hpet_proxy_users[id] = HPET_INFINITY;
1124 				if (id != CPU->cpu_id)
1125 					poke_cpu(id);
1126 			} else if (hpet_proxy_users[id] < next_proxy_time) {
1127 				next_proxy_time = hpet_proxy_users[id];
1128 				next_proxy_id = id;
1129 			}
1130 		}
1131 
1132 		if (next_proxy_time == HPET_INFINITY) {
1133 			done = B_TRUE;
1134 			/*
1135 			 * There are currently no CPUs using the HPET's Timer
1136 			 * as a proxy for their LAPIC Timer.  The HPET's Timer
1137 			 * does not need to be programmed.
1138 			 *
1139 			 * Letting the HPET timer wrap around to the current
1140 			 * time is the longest possible timeout.
1141 			 * A 64-bit timer will wrap around in ~ 2^44 seconds.
1142 			 * A 32-bit timer will wrap around in ~ 2^12 seconds.
1143 			 *
1144 			 * Disabling the HPET's timer interrupt requires a
1145 			 * (relatively expensive) write to the HPET.
1146 			 * Instead we do nothing.
1147 			 *
1148 			 * We are gambling some CPU will attempt to enter a
1149 			 * deep c-state before the timer wraps around.
1150 			 * We assume one spurious interrupt in a little over an
1151 			 * hour has less performance impact than writing to the
1152 			 * HPET's timer disable bit every time all CPUs wakeup
1153 			 * from deep c-state.
1154 			 */
1155 
1156 		} else {
1157 			/*
1158 			 * Idle CPUs disable interrupts before programming the
1159 			 * HPET to prevent a lost wakeup if the HPET
1160 			 * interrupts the idle cpu before it can enter a
1161 			 * Deep C-State.
1162 			 */
1163 			if (hpet_timer_program(&hpet_info, proxy_timer,
1164 			    HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1165 			    != AE_OK) {
1166 				/*
1167 				 * We could not program the HPET to wakeup the
1168 				 * next CPU.  We must wake the CPU ourself to
1169 				 * avoid a lost wakeup.
1170 				 */
1171 				hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1172 				if (next_proxy_id != CPU->cpu_id)
1173 					poke_cpu(next_proxy_id);
1174 			} else {
1175 				done = B_TRUE;
1176 			}
1177 		}
1178 
1179 	} while (!done);
1180 
1181 	return (next_proxy_time <= required_wakeup_time);
1182 }
1183 
1184 /*
1185  * Use an HPET timer to act as this CPU's proxy local APIC timer.
1186  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1187  * Called by the idle thread with interrupts enabled.
1188  * Always returns with interrupts disabled.
1189  *
1190  * There are 3 possible outcomes from this function:
1191  * 1. The Local APIC Timer was already disabled before this function was called.
1192  *	LAPIC TIMER	: disabled
1193  *	HPET		: not scheduled to wake this CPU
1194  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1195  *	Returns		: B_TRUE
1196  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1197  *	LAPIC TIMER	: disabled
1198  *	HPET		: scheduled to wake this CPU
1199  *	*lapic_expire	: hrtime_t when LAPIC timer would have expired
1200  *	Returns		: B_TRUE
1201  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1202  *	LAPIC TIMER	: enabled
1203  *	HPET		: not scheduled to wake this CPU
1204  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1205  *	Returns		: B_FALSE
1206  *
1207  * The idle thread cannot enter Deep C-State in case 3.
1208  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1209  */
1210 static boolean_t
1211 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1212 {
1213 	extern hrtime_t	apic_timer_stop_count(void);
1214 	extern void	apic_timer_restart(hrtime_t);
1215 	hrtime_t	now, expire, dead;
1216 	uint64_t	lapic_count, dead_count;
1217 	cpupart_t	*cpu_part;
1218 	processorid_t	cpu_sid;
1219 	processorid_t	cpu_id = CPU->cpu_id;
1220 	processorid_t	id;
1221 	boolean_t	rslt;
1222 	boolean_t	hset_update;
1223 
1224 	cpu_part = CPU->cpu_part;
1225 	cpu_sid = CPU->cpu_seqid;
1226 
1227 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1228 	ASSERT(interrupts_enabled());
1229 
1230 	/*
1231 	 * A critical section exists between when the HPET is programmed
1232 	 * to interrupt the CPU and when this CPU enters an idle state.
1233 	 * Interrupts must be blocked during that time to prevent lost
1234 	 * CBE wakeup interrupts from either LAPIC or HPET.
1235 	 *
1236 	 * Must block interrupts before acquiring hpet_proxy_lock to prevent
1237 	 * a deadlock with the ISR if the ISR runs on this CPU after the
1238 	 * idle thread acquires the mutex but before it clears interrupts.
1239 	 */
1240 	cli();
1241 
1242 	lapic_count = apic_timer_stop_count();
1243 	now = gethrtime();
1244 	dead = now + hpet_idle_spin_timeout;
1245 	*lapic_expire = expire = now + lapic_count;
1246 	if (lapic_count == (hrtime_t)-1) {
1247 		/*
1248 		 * LAPIC timer is currently disabled.
1249 		 * Will not use the HPET as a LAPIC Timer proxy.
1250 		 */
1251 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1252 		return (B_TRUE);
1253 	}
1254 
1255 	/*
1256 	 * Serialize hpet_proxy data structure manipulation.
1257 	 */
1258 	dead_count = 0;
1259 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1260 		/*
1261 		 * spin
1262 		 */
1263 		apic_timer_restart(expire);
1264 		sti();
1265 		cli();
1266 
1267 		if (dead_count++ > hpet_spin_check) {
1268 			dead_count = 0;
1269 			hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1270 			    (ncpus > 1));
1271 			if (hset_update &&
1272 			    !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1273 				*lapic_expire = (hrtime_t)HPET_INFINITY;
1274 				return (B_FALSE);
1275 			}
1276 		}
1277 
1278 		lapic_count = apic_timer_stop_count();
1279 		now = gethrtime();
1280 		*lapic_expire = expire = now + lapic_count;
1281 		if (lapic_count == (hrtime_t)-1) {
1282 			/*
1283 			 * LAPIC timer is currently disabled.
1284 			 * Will not use the HPET as a LAPIC Timer proxy.
1285 			 */
1286 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1287 			return (B_TRUE);
1288 		}
1289 		if (now > dead) {
1290 			apic_timer_restart(expire);
1291 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1292 			return (B_FALSE);
1293 		}
1294 	}
1295 
1296 	if ((hpet_state.cpr == B_TRUE) ||
1297 	    (hpet_state.cpu_deep_idle == B_FALSE) ||
1298 	    (hpet_state.proxy_installed == B_FALSE) ||
1299 	    (hpet_state.uni_cstate == B_TRUE)) {
1300 		mutex_exit(&hpet_proxy_lock);
1301 		apic_timer_restart(expire);
1302 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1303 		return (B_FALSE);
1304 	}
1305 
1306 	hpet_proxy_users[cpu_id] = expire;
1307 
1308 	/*
1309 	 * We are done if another cpu is scheduled on the HPET with an
1310 	 * expire time before us.  The next HPET interrupt has been programmed
1311 	 * to fire before our expire time.
1312 	 */
1313 	for (id = 0; id < ncpus; ++id) {
1314 		if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1315 			mutex_exit(&hpet_proxy_lock);
1316 			return (B_TRUE);
1317 		}
1318 	}
1319 
1320 	/*
1321 	 * We are the next lAPIC to expire.
1322 	 * Program the HPET with our expire time.
1323 	 */
1324 	rslt = hpet_guaranteed_schedule(expire);
1325 	mutex_exit(&hpet_proxy_lock);
1326 
1327 	if (rslt == B_FALSE) {
1328 		apic_timer_restart(expire);
1329 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1330 	}
1331 
1332 	return (rslt);
1333 }
1334 
1335 /*
1336  * Called by the idle thread when waking up from Deep C-state before enabling
1337  * interrupts.  With an array data structure it is faster to always remove
1338  * ourself from the array without checking if the HPET ISR already removed.
1339  *
1340  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1341  * We do not reprogram the HPET here because this CPU has real work to do.
1342  * On a idle system the CPU was probably woken up by the HPET's ISR.
1343  * On a heavily loaded system CPUs are not going into Deep C-state.
1344  * On a moderately loaded system another CPU will usually enter Deep C-state
1345  * and reprogram the HPET before the HPET fires with our wakeup.
1346  */
1347 static void
1348 hpet_use_lapic_timer(hrtime_t expire)
1349 {
1350 	extern void	apic_timer_restart(hrtime_t);
1351 	processorid_t	cpu_id = CPU->cpu_id;
1352 
1353 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1354 	ASSERT(!interrupts_enabled());
1355 
1356 	hpet_proxy_users[cpu_id] = HPET_INFINITY;
1357 
1358 	/*
1359 	 * Do not enable a LAPIC Timer that was initially disabled.
1360 	 */
1361 	if (expire != HPET_INFINITY)
1362 		apic_timer_restart(expire);
1363 
1364 	sti();
1365 }
1366 
1367 /*
1368  * Initialize data structure to keep track of CPUs using HPET as a proxy for
1369  * their stalled local APIC timer.  For now this is just an array.
1370  */
1371 static void
1372 hpet_init_proxy_data(void)
1373 {
1374 	processorid_t	id;
1375 
1376 	/*
1377 	 * Use apic_nproc because we are in boot before max_ncpus has been
1378 	 * initialized.
1379 	 */
1380 	hpet_proxy_users = kmem_zalloc(apic_nproc * sizeof (*hpet_proxy_users),
1381 	    KM_SLEEP);
1382 
1383 	/*
1384 	 * Unused entries always contain HPET_INFINITY.
1385 	 */
1386 	for (id = 0; id < apic_nproc; ++id)
1387 		hpet_proxy_users[id] = HPET_INFINITY;
1388 }
1389