xref: /illumos-gate/usr/src/uts/i86pc/io/hpet_acpi.c (revision ec71f88e58593e3077f03588d3c38e6cbd4e8c1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/hpet_acpi.h>
26 #include <sys/hpet.h>
27 #include <sys/bitmap.h>
28 #include <sys/inttypes.h>
29 #include <sys/time.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 #include <sys/apic.h>
33 #include <sys/callb.h>
34 #include <sys/clock.h>
35 #include <sys/archsystm.h>
36 #include <sys/cpupart.h>
37 
38 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
39 static boolean_t hpet_install_proxy(void);
40 static boolean_t hpet_callback(int code);
41 static boolean_t hpet_cpr(int code);
42 static boolean_t hpet_resume(void);
43 static void hpet_cst_callback(uint32_t code);
44 static boolean_t hpet_deep_idle_config(int code);
45 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
46 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
47 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
48 static int hpet_start_main_counter(hpet_info_t *hip);
49 static int hpet_stop_main_counter(hpet_info_t *hip);
50 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
51 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
52 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
53 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
54 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
55 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
56 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
57 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
58 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
59 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
60 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
61 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
62 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
63 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
64 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
65 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
66 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
67     uint32_t interrupt);
68 static uint_t hpet_isr(caddr_t, caddr_t);
69 static uint32_t hpet_install_interrupt_handler(avfunc func, int vector);
70 static void hpet_uninstall_interrupt_handler(void);
71 static void hpet_expire_all(void);
72 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
73 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
74 static void hpet_use_lapic_timer(hrtime_t expire);
75 static void hpet_init_proxy_data(void);
76 
77 /*
78  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
79  * and to synchronize suspend/resume.
80  */
81 static kmutex_t		hpet_state_lock;
82 static struct hpet_state {
83 	boolean_t	proxy_installed;	/* CBE proxy interrupt setup */
84 	boolean_t	cpr;			/* currently in CPR */
85 	boolean_t	cpu_deep_idle;		/* user enable/disable */
86 	boolean_t	uni_cstate;		/* disable if only one cstate */
87 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
88 
89 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
90 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
91 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
92 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
93 
94 static kmutex_t		hpet_proxy_lock;	/* lock for lAPIC proxy data */
95 /*
96  * hpet_proxy_users is a per-cpu array.
97  */
98 static hpet_proxy_t	*hpet_proxy_users;	/* one per CPU */
99 
100 
101 ACPI_TABLE_HPET		*hpet_table;		/* ACPI HPET table */
102 hpet_info_t		hpet_info;		/* Human readable Information */
103 
104 /*
105  * Provide HPET access from unix.so.
106  * Set up pointers to access symbols in pcplusmp.
107  */
108 static void
109 hpet_establish_hooks(void)
110 {
111 	hpet.install_proxy = &hpet_install_proxy;
112 	hpet.callback = &hpet_callback;
113 	hpet.use_hpet_timer = &hpet_use_hpet_timer;
114 	hpet.use_lapic_timer = &hpet_use_lapic_timer;
115 }
116 
117 /*
118  * Get the ACPI "HPET" table.
119  * acpi_probe() calls this function from mp_startup before drivers are loaded.
120  * acpi_probe() verified the system is using ACPI before calling this.
121  *
122  * There may be more than one ACPI HPET table (Itanium only?).
123  * Intel's HPET spec defines each timer block to have up to 32 counters and
124  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
125  * Each timer block would have an additional ACPI HPET table.
126  * Typical x86 systems today only have 1 HPET with 3 counters.
127  * On x86 we only consume HPET table "1" for now.
128  */
129 int
130 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
131 {
132 	extern hrtime_t tsc_read(void);
133 	extern int	idle_cpu_no_deep_c;
134 	extern int	cpuid_deep_cstates_supported(void);
135 	void		*la;
136 	uint64_t	ret;
137 	uint_t		num_timers;
138 	uint_t		ti;
139 
140 	(void) memset(&hpet_info, 0, sizeof (hpet_info));
141 	hpet.supported = HPET_NO_SUPPORT;
142 
143 	if (idle_cpu_no_deep_c)
144 		return (DDI_FAILURE);
145 
146 	if (!cpuid_deep_cstates_supported())
147 		return (DDI_FAILURE);
148 
149 	hpet_establish_hooks();
150 
151 	/*
152 	 * Get HPET ACPI table 1.
153 	 */
154 	if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
155 	    (ACPI_TABLE_HEADER **)&hpet_table))) {
156 		cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
157 		return (DDI_FAILURE);
158 	}
159 
160 	if (hpet_validate_table(hpet_table) != AE_OK) {
161 		cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
162 		return (DDI_FAILURE);
163 	}
164 
165 	la = hpet_memory_map(hpet_table);
166 	if (la == NULL) {
167 		cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
168 		return (DDI_FAILURE);
169 	}
170 	hpet_info.logical_address = la;
171 
172 	ret = hpet_read_gen_cap(&hpet_info);
173 	hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
174 	hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
175 	hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
176 	hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
177 	/*
178 	 * Hardware contains the last timer's number.
179 	 * Add 1 to get the number of timers.
180 	 */
181 	hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
182 	hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
183 
184 	if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
185 		cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
186 		    (long)hpet_info.gen_cap.counter_clk_period,
187 		    (long)HPET_MAX_CLK_PERIOD);
188 		return (DDI_FAILURE);
189 	}
190 
191 	num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
192 	if ((num_timers < 3) || (num_timers > 32)) {
193 		cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
194 		    "%lx", (long)num_timers);
195 		return (DDI_FAILURE);
196 	}
197 	hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
198 	    num_timers * sizeof (uint64_t), KM_SLEEP);
199 
200 	ret = hpet_read_gen_config(&hpet_info);
201 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
202 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
203 
204 	/*
205 	 * Solaris does not use the HPET Legacy Replacement Route capabilities.
206 	 * This feature has been off by default on test systems.
207 	 * The HPET spec does not specify if Legacy Replacement Route is
208 	 * on or off by default, so we explicitely set it off here.
209 	 * It should not matter which mode the HPET is in since we use
210 	 * the first available non-legacy replacement timer: timer 2.
211 	 */
212 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
213 
214 	ret = hpet_read_gen_config(&hpet_info);
215 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
216 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
217 
218 	hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
219 	hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
220 
221 	for (ti = 0; ti < num_timers; ++ti) {
222 		ret = hpet_read_timer_N_config(&hpet_info, ti);
223 		/*
224 		 * Make sure no timers are enabled (think fast reboot or
225 		 * virtual hardware).
226 		 */
227 		if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
228 			hpet_disable_timer(&hpet_info, ti);
229 			ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
230 		}
231 
232 		hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
233 	}
234 
235 	/*
236 	 * Be aware the Main Counter may need to be initialized in the future
237 	 * if it is used for more than just Deep C-State support.
238 	 * The HPET's Main Counter does not need to be initialize to a specific
239 	 * value before starting it for use to wake up CPUs from Deep C-States.
240 	 */
241 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
242 		cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
243 		return (DDI_FAILURE);
244 	}
245 
246 	hpet_info.period = hpet_info.gen_cap.counter_clk_period;
247 	/*
248 	 * Read main counter twice to record HPET latency for debugging.
249 	 */
250 	hpet_info.tsc[0] = tsc_read();
251 	hpet_info.hpet_main_counter_reads[0] =
252 	    hpet_read_main_counter_value(&hpet_info);
253 	hpet_info.tsc[1] = tsc_read();
254 	hpet_info.hpet_main_counter_reads[1] =
255 	    hpet_read_main_counter_value(&hpet_info);
256 	hpet_info.tsc[2] = tsc_read();
257 
258 	ret = hpet_read_gen_config(&hpet_info);
259 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
260 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
261 
262 	/*
263 	 * HPET main counter reads are supported now.
264 	 */
265 	hpet.supported = HPET_TIMER_SUPPORT;
266 
267 	return (hpet_init_proxy(hpet_vect, hpet_flags));
268 }
269 
270 void
271 hpet_acpi_fini(void)
272 {
273 	if (hpet.supported == HPET_NO_SUPPORT)
274 		return;
275 	if (hpet.supported >= HPET_TIMER_SUPPORT)
276 		(void) hpet_stop_main_counter(&hpet_info);
277 	if (hpet.supported > HPET_TIMER_SUPPORT)
278 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
279 }
280 
281 /*
282  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
283  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
284  * Setup data to handle the timer's ISR, and add the timer's interrupt.
285  *
286  * The ddi cannot be use to allocate the HPET timer's interrupt.
287  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
288  * to handle the HPET timer's interrupt.
289  *
290  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
291  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
292  */
293 static int
294 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
295 {
296 	if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
297 		cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
298 		return (DDI_FAILURE);
299 	}
300 
301 	hpet_init_proxy_data();
302 
303 	if (hpet_install_interrupt_handler(&hpet_isr,
304 	    hpet_info.cstate_timer.intr) != AE_OK) {
305 		cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
306 		return (DDI_FAILURE);
307 	}
308 	*hpet_vect = hpet_info.cstate_timer.intr;
309 	hpet_flags->intr_el = INTR_EL_LEVEL;
310 	hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
311 	hpet_flags->bustype = BUS_PCI;		/*  we *do* conform to PCI */
312 
313 	/*
314 	 * Avoid a possibly stuck interrupt by programing the HPET's timer here
315 	 * before the I/O APIC is programmed to handle this interrupt.
316 	 */
317 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
318 	    hpet_info.cstate_timer.intr);
319 
320 	/*
321 	 * All HPET functionality is supported.
322 	 */
323 	hpet.supported = HPET_FULL_SUPPORT;
324 	return (DDI_SUCCESS);
325 }
326 
327 /*
328  * Called by kernel if it can support Deep C-States.
329  */
330 static boolean_t
331 hpet_install_proxy(void)
332 {
333 	if (hpet_state.proxy_installed == B_TRUE)
334 		return (B_TRUE);
335 
336 	if (hpet.supported != HPET_FULL_SUPPORT)
337 		return (B_FALSE);
338 
339 	hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
340 	hpet_state.proxy_installed = B_TRUE;
341 
342 	return (B_TRUE);
343 }
344 
345 /*
346  * Remove the interrupt that was added with add_avintr() in
347  * hpet_install_interrupt_handler().
348  */
349 static void
350 hpet_uninstall_interrupt_handler(void)
351 {
352 	rem_avintr(NULL, CBE_HIGH_PIL, &hpet_isr, hpet_info.cstate_timer.intr);
353 }
354 
355 static int
356 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
357 {
358 	ACPI_TABLE_HEADER	*table_header = (ACPI_TABLE_HEADER *)hpet_table;
359 
360 	if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
361 		cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
362 		    "ACPI_TABLE_HPET) %lx.",
363 		    (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
364 		    (unsigned long)sizeof (ACPI_TABLE_HPET));
365 		return (AE_ERROR);
366 	}
367 
368 	if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
369 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
370 		    "signature");
371 		return (AE_ERROR);
372 	}
373 
374 	if (!hpet_checksum_table((unsigned char *)hpet_table,
375 	    (unsigned int)table_header->Length)) {
376 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
377 		return (AE_ERROR);
378 	}
379 
380 	/*
381 	 * Sequence should be table number - 1.  We are using table 1.
382 	 */
383 	if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
384 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
385 		    (long)hpet_table->Sequence);
386 		return (AE_ERROR);
387 	}
388 
389 	return (AE_OK);
390 }
391 
392 static boolean_t
393 hpet_checksum_table(unsigned char *table, unsigned int length)
394 {
395 	unsigned char	checksum = 0;
396 	int		i;
397 
398 	for (i = 0; i < length; ++i, ++table)
399 		checksum += *table;
400 
401 	return (checksum == 0);
402 }
403 
404 static void *
405 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
406 {
407 	return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
408 }
409 
410 static int
411 hpet_start_main_counter(hpet_info_t *hip)
412 {
413 	uint64_t	*gcr_ptr;
414 	uint64_t	gcr;
415 
416 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
417 	gcr = *gcr_ptr;
418 
419 	gcr |= HPET_GCFR_ENABLE_CNF;
420 	*gcr_ptr = gcr;
421 	gcr = *gcr_ptr;
422 
423 	return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
424 }
425 
426 static int
427 hpet_stop_main_counter(hpet_info_t *hip)
428 {
429 	uint64_t	*gcr_ptr;
430 	uint64_t	gcr;
431 
432 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
433 	gcr = *gcr_ptr;
434 
435 	gcr &= ~HPET_GCFR_ENABLE_CNF;
436 	*gcr_ptr = gcr;
437 	gcr = *gcr_ptr;
438 
439 	return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
440 }
441 
442 /*
443  * Set the Legacy Replacement Route bit.
444  * This should be called before setting up timers.
445  * The HPET specification is silent regarding setting this after timers are
446  * programmed.
447  */
448 static uint64_t
449 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
450 {
451 	uint64_t gen_conf = hpet_read_gen_config(hip);
452 
453 	switch (new_value) {
454 	case 0:
455 		gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
456 		break;
457 
458 	case HPET_GCFR_LEG_RT_CNF:
459 		gen_conf |= HPET_GCFR_LEG_RT_CNF;
460 		break;
461 
462 	default:
463 		ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
464 		break;
465 	}
466 	hpet_write_gen_config(hip, gen_conf);
467 	return (gen_conf);
468 }
469 
470 static uint64_t
471 hpet_read_gen_cap(hpet_info_t *hip)
472 {
473 	return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
474 }
475 
476 static uint64_t
477 hpet_read_gen_config(hpet_info_t *hip)
478 {
479 	return (*(uint64_t *)
480 	    HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
481 }
482 
483 static uint64_t
484 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
485 {
486 	hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
487 	    hip->logical_address);
488 	return (hip->gen_intrpt_stat);
489 }
490 
491 static uint64_t
492 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
493 {
494 	uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
495 	    hip->logical_address, n);
496 	hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
497 	return (conf);
498 }
499 
500 static hpet_TN_conf_cap_t
501 hpet_convert_timer_N_config(uint64_t conf)
502 {
503 	hpet_TN_conf_cap_t cc = { 0 };
504 
505 	cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
506 	cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
507 	cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
508 	cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
509 	cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
510 	cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
511 	cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
512 	cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
513 	cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
514 	cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
515 	cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
516 
517 	return (cc);
518 }
519 
520 static uint64_t
521 hpet_read_main_counter_value(hpet_info_t *hip)
522 {
523 	uint64_t	value;
524 	uint32_t	*counter;
525 	uint32_t	high1, high2, low;
526 
527 	counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
528 
529 	/*
530 	 * 32-bit main counters
531 	 */
532 	if (hip->gen_cap.count_size_cap == 0) {
533 		value = (uint64_t)*counter;
534 		hip->main_counter_value = value;
535 		return (value);
536 	}
537 
538 	/*
539 	 * HPET spec claims a 64-bit read can be split into two 32-bit reads
540 	 * by the hardware connection to the HPET.
541 	 */
542 	high2 = counter[1];
543 	do {
544 		high1 = high2;
545 		low = counter[0];
546 		high2 = counter[1];
547 	} while (high2 != high1);
548 
549 	value = ((uint64_t)high1 << 32) | low;
550 	hip->main_counter_value = value;
551 	return (value);
552 }
553 
554 static void
555 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
556 {
557 	*(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
558 }
559 
560 static void
561 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
562 {
563 	*(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
564 }
565 
566 static void
567 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
568 {
569 	if (hip->timer_n_config[n].size_cap == 1)
570 		*(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
571 		    hip->logical_address, n) = l;
572 	else
573 		*(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
574 		    hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
575 }
576 
577 static void
578 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
579 {
580 	*(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
581 }
582 
583 static void
584 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
585 {
586 	uint64_t l;
587 
588 	l = hpet_read_timer_N_config(hip, timer_n);
589 	l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
590 	hpet_write_timer_N_config(hip, timer_n, l);
591 }
592 
593 static void
594 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
595 {
596 	uint64_t l;
597 
598 	l = hpet_read_timer_N_config(hip, timer_n);
599 	l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
600 	hpet_write_timer_N_config(hip, timer_n, l);
601 }
602 
603 /*
604  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
605  *
606  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
607  * from apic_picinit() psm_ops apic_ops entry point after we return from
608  * apic_init() psm_ops entry point.
609  */
610 static uint32_t
611 hpet_install_interrupt_handler(avfunc func, int vector)
612 {
613 	uint32_t retval;
614 
615 	retval = add_avintr(NULL, CBE_HIGH_PIL, func, "HPET Timer",
616 	    vector, NULL, NULL, NULL, NULL);
617 	if (retval == 0) {
618 		cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
619 		return (AE_BAD_PARAMETER);
620 	}
621 	return (AE_OK);
622 }
623 
624 /*
625  * The HPET timers specify which I/O APIC interrupts they can be routed to.
626  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
627  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
628  * timer's timer_n_config register.
629  */
630 static int
631 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
632 {
633 	int	timer;
634 	int	intr;
635 
636 	for (timer = HPET_FIRST_NON_LEGACY_TIMER;
637 	    timer < hip->gen_cap.num_tim_cap; ++timer) {
638 
639 		if (!hpet_timer_available(hip->allocated_timers, timer))
640 			continue;
641 
642 		intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
643 		if (intr >= 0) {
644 			hpet_timer_alloc(&hip->allocated_timers, timer);
645 			hip->cstate_timer.timer = timer;
646 			hip->cstate_timer.intr = intr;
647 			return (timer);
648 		}
649 	}
650 
651 	return (-1);
652 }
653 
654 /*
655  * Mark this timer as used.
656  */
657 static void
658 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
659 {
660 	*allocated_timers |= 1 << n;
661 }
662 
663 /*
664  * Check if this timer is available.
665  * No mutual exclusion because only one thread uses this.
666  */
667 static int
668 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
669 {
670 	return ((allocated_timers & (1 << n)) == 0);
671 }
672 
673 /*
674  * Setup timer N to route its interrupt to I/O APIC.
675  */
676 static void
677 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
678 {
679 	uint64_t conf;
680 
681 	conf = hpet_read_timer_N_config(hip, timer_n);
682 
683 	/*
684 	 * Caller is required to verify this interrupt route is supported.
685 	 */
686 	ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
687 
688 	conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;	/* use IOAPIC */
689 	conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
690 	conf &= ~HPET_TIMER_N_TYPE_CNF_BIT;	/* non periodic */
691 	conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;	/* disabled */
692 	conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;	/* Level Triggered */
693 
694 	hpet_write_timer_N_config(hip, timer_n, conf);
695 }
696 
697 /*
698  * The HPET's Main Counter is not stopped before programming an HPET timer.
699  * This will allow the HPET to be used as a time source.
700  * The programmed timer interrupt may occur before this function returns.
701  * Callers must block interrupts before calling this function if they must
702  * guarantee the interrupt is handled after this function returns.
703  *
704  * Return 0 if main counter is less than timer after enabling timer.
705  * The interrupt was programmed, but it may fire before this returns.
706  * Return !0 if main counter is greater than timer after enabling timer.
707  * In other words: the timer will not fire, and we do not know if it did fire.
708  *
709  * delta is in HPET ticks.
710  *
711  * Writing a 64-bit value to a 32-bit register will "wrap around".
712  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
713  */
714 int
715 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
716 {
717 	uint64_t time, program;
718 
719 	program = hpet_read_main_counter_value(hip);
720 	program += delta;
721 	hpet_write_timer_N_comp(hip, timer, program);
722 
723 	time = hpet_read_main_counter_value(hip);
724 	if (time < program)
725 		return (AE_OK);
726 
727 	return (AE_TIME);
728 }
729 
730 /*
731  * CPR and power policy-change callback entry point.
732  */
733 boolean_t
734 hpet_callback(int code)
735 {
736 	switch (code) {
737 	case PM_DEFAULT_CPU_DEEP_IDLE:
738 		/*FALLTHROUGH*/
739 	case PM_ENABLE_CPU_DEEP_IDLE:
740 		/*FALLTHROUGH*/
741 	case PM_DISABLE_CPU_DEEP_IDLE:
742 		return (hpet_deep_idle_config(code));
743 
744 	case CB_CODE_CPR_RESUME:
745 		/*FALLTHROUGH*/
746 	case CB_CODE_CPR_CHKPT:
747 		return (hpet_cpr(code));
748 
749 	case CST_EVENT_MULTIPLE_CSTATES:
750 		hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
751 		return (B_TRUE);
752 
753 	case CST_EVENT_ONE_CSTATE:
754 		hpet_cst_callback(CST_EVENT_ONE_CSTATE);
755 		return (B_TRUE);
756 
757 	default:
758 		cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
759 		return (B_FALSE);
760 	}
761 }
762 
763 /*
764  * According to the HPET spec 1.0a: the Operating System must save and restore
765  * HPET event timer hardware context through ACPI sleep state transitions.
766  * Timer registers (including the main counter) may not be preserved through
767  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
768  *
769  * Current HPET state is already in hpet.supported and
770  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
771  * Timer state.
772  *
773  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
774  * S4, and it is not saved/restored here.  Future projects cannot expect the
775  * Main Counter to be monotomically (or accurately) increasing across CPR.
776  *
777  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
778  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
779  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
780  * It is safe to leave the HPET running as the system suspends; we just
781  * disable the timer from generating interrupts here.
782  */
783 static boolean_t
784 hpet_cpr(int code)
785 {
786 	ulong_t		intr, dead_count = 0;
787 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
788 	boolean_t	ret = B_TRUE;
789 
790 	mutex_enter(&hpet_state_lock);
791 	switch (code) {
792 	case CB_CODE_CPR_CHKPT:
793 		if (hpet_state.proxy_installed == B_FALSE)
794 			break;
795 
796 		hpet_state.cpr = B_TRUE;
797 
798 		intr = intr_clear();
799 		while (!mutex_tryenter(&hpet_proxy_lock)) {
800 			/*
801 			 * spin
802 			 */
803 			intr_restore(intr);
804 			if (dead_count++ > hpet_spin_check) {
805 				dead_count = 0;
806 				if (gethrtime() > dead) {
807 					hpet_state.cpr = B_FALSE;
808 					mutex_exit(&hpet_state_lock);
809 					cmn_err(CE_NOTE, "!hpet_cpr: deadman");
810 					return (B_FALSE);
811 				}
812 			}
813 			intr = intr_clear();
814 		}
815 		hpet_expire_all();
816 		mutex_exit(&hpet_proxy_lock);
817 		intr_restore(intr);
818 
819 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
820 		break;
821 
822 	case CB_CODE_CPR_RESUME:
823 		if (hpet_resume() == B_TRUE)
824 			hpet_state.cpr = B_FALSE;
825 		else
826 			cmn_err(CE_NOTE, "!hpet_resume failed.");
827 		break;
828 
829 	default:
830 		cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
831 		ret = B_FALSE;
832 		break;
833 	}
834 	mutex_exit(&hpet_state_lock);
835 	return (ret);
836 }
837 
838 /*
839  * Assume the HPET stopped in Suspend state and timer state was lost.
840  */
841 static boolean_t
842 hpet_resume(void)
843 {
844 	if (hpet.supported != HPET_TIMER_SUPPORT)
845 		return (B_TRUE);
846 
847 	/*
848 	 * The HPET spec does not specify if Legacy Replacement Route is
849 	 * on or off by default, so we set it off here.
850 	 */
851 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
852 
853 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
854 		cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
855 		hpet.supported = HPET_NO_SUPPORT;
856 		if (hpet_state.proxy_installed == B_TRUE) {
857 			hpet_state.proxy_installed = B_FALSE;
858 			hpet_uninstall_interrupt_handler();
859 		}
860 		return (B_FALSE);
861 	}
862 
863 	if (hpet_state.proxy_installed == B_FALSE)
864 		return (B_TRUE);
865 
866 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
867 	    hpet_info.cstate_timer.intr);
868 	if (hpet_state.cpu_deep_idle == B_TRUE)
869 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
870 
871 	return (B_TRUE);
872 }
873 
874 /*
875  * Callback to enable/disable Deep C-States based on power.conf setting.
876  */
877 static boolean_t
878 hpet_deep_idle_config(int code)
879 {
880 	ulong_t		intr, dead_count = 0;
881 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
882 	boolean_t	ret = B_TRUE;
883 
884 	mutex_enter(&hpet_state_lock);
885 	switch (code) {
886 	case PM_DEFAULT_CPU_DEEP_IDLE:
887 		/*FALLTHROUGH*/
888 	case PM_ENABLE_CPU_DEEP_IDLE:
889 
890 		if (hpet_state.cpu_deep_idle == B_TRUE)
891 			break;
892 
893 		if (hpet_state.proxy_installed == B_FALSE) {
894 			ret = B_FALSE;  /* Deep C-States not supported */
895 			break;
896 		}
897 
898 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
899 		hpet_state.cpu_deep_idle = B_TRUE;
900 		break;
901 
902 	case PM_DISABLE_CPU_DEEP_IDLE:
903 
904 		if ((hpet_state.cpu_deep_idle == B_FALSE) ||
905 		    (hpet_state.proxy_installed == B_FALSE))
906 			break;
907 
908 		/*
909 		 * The order of these operations is important to avoid
910 		 * lost wakeups: Set a flag to refuse all future LAPIC Timer
911 		 * proxy requests, then wake up all CPUs from deep C-state,
912 		 * and finally disable the HPET interrupt-generating timer.
913 		 */
914 		hpet_state.cpu_deep_idle = B_FALSE;
915 
916 		intr = intr_clear();
917 		while (!mutex_tryenter(&hpet_proxy_lock)) {
918 			/*
919 			 * spin
920 			 */
921 			intr_restore(intr);
922 			if (dead_count++ > hpet_spin_check) {
923 				dead_count = 0;
924 				if (gethrtime() > dead) {
925 					hpet_state.cpu_deep_idle = B_TRUE;
926 					mutex_exit(&hpet_state_lock);
927 					cmn_err(CE_NOTE,
928 					    "!hpet_deep_idle_config: deadman");
929 					return (B_FALSE);
930 				}
931 			}
932 			intr = intr_clear();
933 		}
934 		hpet_expire_all();
935 		mutex_exit(&hpet_proxy_lock);
936 		intr_restore(intr);
937 
938 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
939 		break;
940 
941 	default:
942 		cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
943 		    code);
944 		ret = B_FALSE;
945 		break;
946 	}
947 	mutex_exit(&hpet_state_lock);
948 
949 	return (ret);
950 }
951 
952 /*
953  * Callback for _CST c-state change notifications.
954  */
955 static void
956 hpet_cst_callback(uint32_t code)
957 {
958 	ulong_t		intr, dead_count = 0;
959 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
960 
961 	switch (code) {
962 	case CST_EVENT_ONE_CSTATE:
963 		hpet_state.uni_cstate = B_TRUE;
964 		intr = intr_clear();
965 		while (!mutex_tryenter(&hpet_proxy_lock)) {
966 			/*
967 			 * spin
968 			 */
969 			intr_restore(intr);
970 			if (dead_count++ > hpet_spin_check) {
971 				dead_count = 0;
972 				if (gethrtime() > dead) {
973 					hpet_expire_all();
974 					cmn_err(CE_NOTE,
975 					    "!hpet_cst_callback: deadman");
976 					return;
977 				}
978 			}
979 			intr = intr_clear();
980 		}
981 		hpet_expire_all();
982 		mutex_exit(&hpet_proxy_lock);
983 		intr_restore(intr);
984 		break;
985 
986 	case CST_EVENT_MULTIPLE_CSTATES:
987 		hpet_state.uni_cstate = B_FALSE;
988 		break;
989 
990 	default:
991 		cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
992 		break;
993 	}
994 }
995 
996 /*
997  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
998  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
999  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1000  * needed.
1001  */
1002 static uint_t
1003 hpet_isr(caddr_t arg __unused, caddr_t arg1 __unused)
1004 {
1005 	uint64_t	timer_status;
1006 	uint64_t	timer_mask;
1007 	ulong_t		intr, dead_count = 0;
1008 	hrtime_t	dead = gethrtime() + hpet_isr_spin_timeout;
1009 
1010 	timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1011 
1012 	/*
1013 	 * We are using a level-triggered interrupt.
1014 	 * HPET sets timer's General Interrupt Status Register bit N.
1015 	 * ISR checks this bit to see if it needs servicing.
1016 	 * ISR then clears this bit by writing 1 to that bit.
1017 	 */
1018 	timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1019 	if (!(timer_status & timer_mask))
1020 		return (DDI_INTR_UNCLAIMED);
1021 	hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1022 
1023 	/*
1024 	 * Do not touch ISR data structures before checking the HPET's General
1025 	 * Interrupt Status register.  The General Interrupt Status register
1026 	 * will not be set by hardware until after timer interrupt generation
1027 	 * is enabled by software.  Software allocates necessary data
1028 	 * structures before enabling timer interrupts.  ASSERT the software
1029 	 * data structures required to handle this interrupt are initialized.
1030 	 */
1031 	ASSERT(hpet_proxy_users != NULL);
1032 
1033 	/*
1034 	 * CPUs in deep c-states do not enable interrupts until after
1035 	 * performing idle cleanup which includes descheduling themselves from
1036 	 * the HPET.  The CPU running this ISR will NEVER find itself in the
1037 	 * proxy list.  A lost wakeup may occur if this is false.
1038 	 */
1039 	ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1040 
1041 	/*
1042 	 * Higher level interrupts may deadlock with CPUs going idle if this
1043 	 * ISR is prempted while holding hpet_proxy_lock.
1044 	 */
1045 	intr = intr_clear();
1046 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1047 		/*
1048 		 * spin
1049 		 */
1050 		intr_restore(intr);
1051 		if (dead_count++ > hpet_spin_check) {
1052 			dead_count = 0;
1053 			if (gethrtime() > dead) {
1054 				hpet_expire_all();
1055 				return (DDI_INTR_CLAIMED);
1056 			}
1057 		}
1058 		intr = intr_clear();
1059 	}
1060 	(void) hpet_guaranteed_schedule(HPET_INFINITY);
1061 	mutex_exit(&hpet_proxy_lock);
1062 	intr_restore(intr);
1063 
1064 	return (DDI_INTR_CLAIMED);
1065 }
1066 
1067 /*
1068  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
1069  * woken up because they can no longer rely on the HPET's Timer to wake them.
1070  * We do not need to wait for CPUs to wakeup.
1071  */
1072 static void
1073 hpet_expire_all(void)
1074 {
1075 	processorid_t	id;
1076 
1077 	for (id = 0; id < max_ncpus; ++id) {
1078 		if (hpet_proxy_users[id] != HPET_INFINITY) {
1079 			hpet_proxy_users[id] = HPET_INFINITY;
1080 			if (id != CPU->cpu_id)
1081 				poke_cpu(id);
1082 		}
1083 	}
1084 }
1085 
1086 /*
1087  * To avoid missed wakeups this function must guarantee either the HPET timer
1088  * was successfully programmed to the next expire time or there are no waiting
1089  * CPUs.
1090  *
1091  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1092  * generate its next interrupt to happen at required_wakeup_time or sooner.
1093  * Returns B_TRUE if the HPET was programmed to interrupt by
1094  * required_wakeup_time, B_FALSE if not.
1095  */
1096 static boolean_t
1097 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1098 {
1099 	hrtime_t	now, next_proxy_time;
1100 	processorid_t	id, next_proxy_id;
1101 	int		proxy_timer = hpet_info.cstate_timer.timer;
1102 	boolean_t	done = B_FALSE;
1103 
1104 	ASSERT(mutex_owned(&hpet_proxy_lock));
1105 
1106 	/*
1107 	 * Loop until we successfully program the HPET,
1108 	 * or no CPUs are scheduled to use the HPET as a proxy.
1109 	 */
1110 	do {
1111 		/*
1112 		 * Wake all CPUs that expired before now.
1113 		 * Find the next CPU to wake up and next HPET program time.
1114 		 */
1115 		now = gethrtime();
1116 		next_proxy_time = HPET_INFINITY;
1117 		next_proxy_id = CPU->cpu_id;
1118 		for (id = 0; id < max_ncpus; ++id) {
1119 			if (hpet_proxy_users[id] < now) {
1120 				hpet_proxy_users[id] = HPET_INFINITY;
1121 				if (id != CPU->cpu_id)
1122 					poke_cpu(id);
1123 			} else if (hpet_proxy_users[id] < next_proxy_time) {
1124 				next_proxy_time = hpet_proxy_users[id];
1125 				next_proxy_id = id;
1126 			}
1127 		}
1128 
1129 		if (next_proxy_time == HPET_INFINITY) {
1130 			done = B_TRUE;
1131 			/*
1132 			 * There are currently no CPUs using the HPET's Timer
1133 			 * as a proxy for their LAPIC Timer.  The HPET's Timer
1134 			 * does not need to be programmed.
1135 			 *
1136 			 * Letting the HPET timer wrap around to the current
1137 			 * time is the longest possible timeout.
1138 			 * A 64-bit timer will wrap around in ~ 2^44 seconds.
1139 			 * A 32-bit timer will wrap around in ~ 2^12 seconds.
1140 			 *
1141 			 * Disabling the HPET's timer interrupt requires a
1142 			 * (relatively expensive) write to the HPET.
1143 			 * Instead we do nothing.
1144 			 *
1145 			 * We are gambling some CPU will attempt to enter a
1146 			 * deep c-state before the timer wraps around.
1147 			 * We assume one spurious interrupt in a little over an
1148 			 * hour has less performance impact than writing to the
1149 			 * HPET's timer disable bit every time all CPUs wakeup
1150 			 * from deep c-state.
1151 			 */
1152 
1153 		} else {
1154 			/*
1155 			 * Idle CPUs disable interrupts before programming the
1156 			 * HPET to prevent a lost wakeup if the HPET
1157 			 * interrupts the idle cpu before it can enter a
1158 			 * Deep C-State.
1159 			 */
1160 			if (hpet_timer_program(&hpet_info, proxy_timer,
1161 			    HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1162 			    != AE_OK) {
1163 				/*
1164 				 * We could not program the HPET to wakeup the
1165 				 * next CPU.  We must wake the CPU ourself to
1166 				 * avoid a lost wakeup.
1167 				 */
1168 				hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1169 				if (next_proxy_id != CPU->cpu_id)
1170 					poke_cpu(next_proxy_id);
1171 			} else {
1172 				done = B_TRUE;
1173 			}
1174 		}
1175 
1176 	} while (!done);
1177 
1178 	return (next_proxy_time <= required_wakeup_time);
1179 }
1180 
1181 /*
1182  * Use an HPET timer to act as this CPU's proxy local APIC timer.
1183  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1184  * Called by the idle thread with interrupts enabled.
1185  * Always returns with interrupts disabled.
1186  *
1187  * There are 3 possible outcomes from this function:
1188  * 1. The Local APIC Timer was already disabled before this function was called.
1189  *	LAPIC TIMER	: disabled
1190  *	HPET		: not scheduled to wake this CPU
1191  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1192  *	Returns		: B_TRUE
1193  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1194  *	LAPIC TIMER	: disabled
1195  *	HPET		: scheduled to wake this CPU
1196  *	*lapic_expire	: hrtime_t when LAPIC timer would have expired
1197  *	Returns		: B_TRUE
1198  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1199  *	LAPIC TIMER	: enabled
1200  *	HPET		: not scheduled to wake this CPU
1201  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1202  *	Returns		: B_FALSE
1203  *
1204  * The idle thread cannot enter Deep C-State in case 3.
1205  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1206  */
1207 static boolean_t
1208 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1209 {
1210 	extern hrtime_t	apic_timer_stop_count(void);
1211 	extern void	apic_timer_restart(hrtime_t);
1212 	hrtime_t	now, expire, dead;
1213 	uint64_t	lapic_count, dead_count;
1214 	cpupart_t	*cpu_part;
1215 	processorid_t	cpu_sid;
1216 	processorid_t	cpu_id = CPU->cpu_id;
1217 	processorid_t	id;
1218 	boolean_t	rslt;
1219 	boolean_t	hset_update;
1220 
1221 	cpu_part = CPU->cpu_part;
1222 	cpu_sid = CPU->cpu_seqid;
1223 
1224 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1225 
1226 	/*
1227 	 * A critical section exists between when the HPET is programmed
1228 	 * to interrupt the CPU and when this CPU enters an idle state.
1229 	 * Interrupts must be blocked during that time to prevent lost
1230 	 * CBE wakeup interrupts from either LAPIC or HPET.
1231 	 *
1232 	 * Must block interrupts before acquiring hpet_proxy_lock to prevent
1233 	 * a deadlock with the ISR if the ISR runs on this CPU after the
1234 	 * idle thread acquires the mutex but before it clears interrupts.
1235 	 */
1236 	ASSERT(!interrupts_enabled());
1237 	lapic_count = apic_timer_stop_count();
1238 	now = gethrtime();
1239 	dead = now + hpet_idle_spin_timeout;
1240 	*lapic_expire = expire = now + lapic_count;
1241 	if (lapic_count == (hrtime_t)-1) {
1242 		/*
1243 		 * LAPIC timer is currently disabled.
1244 		 * Will not use the HPET as a LAPIC Timer proxy.
1245 		 */
1246 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1247 		return (B_TRUE);
1248 	}
1249 
1250 	/*
1251 	 * Serialize hpet_proxy data structure manipulation.
1252 	 */
1253 	dead_count = 0;
1254 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1255 		/*
1256 		 * spin
1257 		 */
1258 		apic_timer_restart(expire);
1259 		sti();
1260 		cli();
1261 
1262 		if (dead_count++ > hpet_spin_check) {
1263 			dead_count = 0;
1264 			hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1265 			    (ncpus > 1));
1266 			if (hset_update &&
1267 			    !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1268 				*lapic_expire = (hrtime_t)HPET_INFINITY;
1269 				return (B_FALSE);
1270 			}
1271 		}
1272 
1273 		lapic_count = apic_timer_stop_count();
1274 		now = gethrtime();
1275 		*lapic_expire = expire = now + lapic_count;
1276 		if (lapic_count == (hrtime_t)-1) {
1277 			/*
1278 			 * LAPIC timer is currently disabled.
1279 			 * Will not use the HPET as a LAPIC Timer proxy.
1280 			 */
1281 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1282 			return (B_TRUE);
1283 		}
1284 		if (now > dead) {
1285 			apic_timer_restart(expire);
1286 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1287 			return (B_FALSE);
1288 		}
1289 	}
1290 
1291 	if ((hpet_state.cpr == B_TRUE) ||
1292 	    (hpet_state.cpu_deep_idle == B_FALSE) ||
1293 	    (hpet_state.proxy_installed == B_FALSE) ||
1294 	    (hpet_state.uni_cstate == B_TRUE)) {
1295 		mutex_exit(&hpet_proxy_lock);
1296 		apic_timer_restart(expire);
1297 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1298 		return (B_FALSE);
1299 	}
1300 
1301 	hpet_proxy_users[cpu_id] = expire;
1302 
1303 	/*
1304 	 * We are done if another cpu is scheduled on the HPET with an
1305 	 * expire time before us.  The next HPET interrupt has been programmed
1306 	 * to fire before our expire time.
1307 	 */
1308 	for (id = 0; id < max_ncpus; ++id) {
1309 		if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1310 			mutex_exit(&hpet_proxy_lock);
1311 			return (B_TRUE);
1312 		}
1313 	}
1314 
1315 	/*
1316 	 * We are the next lAPIC to expire.
1317 	 * Program the HPET with our expire time.
1318 	 */
1319 	rslt = hpet_guaranteed_schedule(expire);
1320 	mutex_exit(&hpet_proxy_lock);
1321 
1322 	if (rslt == B_FALSE) {
1323 		apic_timer_restart(expire);
1324 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1325 	}
1326 
1327 	return (rslt);
1328 }
1329 
1330 /*
1331  * Called by the idle thread when waking up from Deep C-state before enabling
1332  * interrupts.  With an array data structure it is faster to always remove
1333  * ourself from the array without checking if the HPET ISR already removed.
1334  *
1335  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1336  * We do not reprogram the HPET here because this CPU has real work to do.
1337  * On a idle system the CPU was probably woken up by the HPET's ISR.
1338  * On a heavily loaded system CPUs are not going into Deep C-state.
1339  * On a moderately loaded system another CPU will usually enter Deep C-state
1340  * and reprogram the HPET before the HPET fires with our wakeup.
1341  */
1342 static void
1343 hpet_use_lapic_timer(hrtime_t expire)
1344 {
1345 	extern void	apic_timer_restart(hrtime_t);
1346 	processorid_t	cpu_id = CPU->cpu_id;
1347 
1348 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1349 	ASSERT(!interrupts_enabled());
1350 
1351 	hpet_proxy_users[cpu_id] = HPET_INFINITY;
1352 
1353 	/*
1354 	 * Do not enable a LAPIC Timer that was initially disabled.
1355 	 */
1356 	if (expire != HPET_INFINITY)
1357 		apic_timer_restart(expire);
1358 }
1359 
1360 /*
1361  * Initialize data structure to keep track of CPUs using HPET as a proxy for
1362  * their stalled local APIC timer.  For now this is just an array.
1363  */
1364 static void
1365 hpet_init_proxy_data(void)
1366 {
1367 	processorid_t	id;
1368 
1369 	/*
1370 	 * Use max_ncpus for hot plug compliance.
1371 	 */
1372 	hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1373 	    KM_SLEEP);
1374 
1375 	/*
1376 	 * Unused entries always contain HPET_INFINITY.
1377 	 */
1378 	for (id = 0; id < max_ncpus; ++id)
1379 		hpet_proxy_users[id] = HPET_INFINITY;
1380 }
1381