xref: /titanic_41/usr/src/uts/i86pc/io/hpet_acpi.c (revision 3fc1e17e160b171792527e6238216e3a602e8f8b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/hpet_acpi.h>
26 #include <sys/hpet.h>
27 #include <sys/bitmap.h>
28 #include <sys/inttypes.h>
29 #include <sys/time.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 #include <sys/apic.h>
33 #include <sys/callb.h>
34 #include <sys/clock.h>
35 #include <sys/archsystm.h>
36 #include <sys/cpupart.h>
37 
38 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
39 static boolean_t hpet_install_proxy(void);
40 static boolean_t hpet_callback(int code);
41 static boolean_t hpet_cpr(int code);
42 static boolean_t hpet_resume(void);
43 static void hpet_cst_callback(uint32_t code);
44 static boolean_t hpet_deep_idle_config(int code);
45 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
46 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
47 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
48 static int hpet_start_main_counter(hpet_info_t *hip);
49 static int hpet_stop_main_counter(hpet_info_t *hip);
50 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
51 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
52 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
53 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
54 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
55 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
56 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
57 /* LINTED E_STATIC_UNUSED */
58 static uint64_t hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n);
59 /* LINTED E_STATIC_UNUSED */
60 static void hpet_write_gen_cap(hpet_info_t *hip, uint64_t l);
61 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
62 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
63 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
64 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
65 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
66 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
67 /* LINTED E_STATIC_UNUSED */
68 static void hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l);
69 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
70 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
71 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
72 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
73     uint32_t interrupt);
74 static uint_t hpet_isr(char *arg);
75 static uint32_t hpet_install_interrupt_handler(uint_t (*func)(char *),
76     int vector);
77 static void hpet_uninstall_interrupt_handler(void);
78 static void hpet_expire_all(void);
79 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
80 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
81 static void hpet_use_lapic_timer(hrtime_t expire);
82 static void hpet_init_proxy_data(void);
83 
84 /*
85  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
86  * and to synchronize suspend/resume.
87  */
88 static kmutex_t		hpet_state_lock;
89 static struct hpet_state {
90 	boolean_t	proxy_installed;	/* CBE proxy interrupt setup */
91 	boolean_t	cpr;			/* currently in CPR */
92 	boolean_t	cpu_deep_idle;		/* user enable/disable */
93 	boolean_t	uni_cstate;		/* disable if only one cstate */
94 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
95 
96 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
97 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
98 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
99 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
100 
101 static kmutex_t		hpet_proxy_lock;	/* lock for lAPIC proxy data */
102 /*
103  * hpet_proxy_users is a per-cpu array.
104  */
105 static hpet_proxy_t	*hpet_proxy_users;	/* one per CPU */
106 
107 
108 ACPI_TABLE_HPET		*hpet_table;		/* ACPI HPET table */
109 hpet_info_t		hpet_info;		/* Human readable Information */
110 
111 /*
112  * Provide HPET access from unix.so.
113  * Set up pointers to access symbols in pcplusmp.
114  */
115 static void
116 hpet_establish_hooks(void)
117 {
118 	hpet.install_proxy = &hpet_install_proxy;
119 	hpet.callback = &hpet_callback;
120 	hpet.use_hpet_timer = &hpet_use_hpet_timer;
121 	hpet.use_lapic_timer = &hpet_use_lapic_timer;
122 }
123 
124 /*
125  * Get the ACPI "HPET" table.
126  * acpi_probe() calls this function from mp_startup before drivers are loaded.
127  * acpi_probe() verified the system is using ACPI before calling this.
128  *
129  * There may be more than one ACPI HPET table (Itanium only?).
130  * Intel's HPET spec defines each timer block to have up to 32 counters and
131  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
132  * Each timer block would have an additional ACPI HPET table.
133  * Typical x86 systems today only have 1 HPET with 3 counters.
134  * On x86 we only consume HPET table "1" for now.
135  */
136 int
137 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
138 {
139 	extern hrtime_t tsc_read(void);
140 	extern int	idle_cpu_no_deep_c;
141 	extern int	cpuid_deep_cstates_supported(void);
142 	void		*la;
143 	uint64_t	ret;
144 	uint_t		num_timers;
145 	uint_t		ti;
146 
147 	(void) memset(&hpet_info, 0, sizeof (hpet_info));
148 	hpet.supported = HPET_NO_SUPPORT;
149 
150 	if (idle_cpu_no_deep_c)
151 		return (DDI_FAILURE);
152 
153 	if (!cpuid_deep_cstates_supported())
154 		return (DDI_FAILURE);
155 
156 	hpet_establish_hooks();
157 
158 	/*
159 	 * Get HPET ACPI table 1.
160 	 */
161 	if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
162 	    (ACPI_TABLE_HEADER **)&hpet_table))) {
163 		cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
164 		return (DDI_FAILURE);
165 	}
166 
167 	if (hpet_validate_table(hpet_table) != AE_OK) {
168 		cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
169 		return (DDI_FAILURE);
170 	}
171 
172 	la = hpet_memory_map(hpet_table);
173 	if (la == NULL) {
174 		cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
175 		return (DDI_FAILURE);
176 	}
177 	hpet_info.logical_address = la;
178 
179 	ret = hpet_read_gen_cap(&hpet_info);
180 	hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
181 	hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
182 	hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
183 	hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
184 	/*
185 	 * Hardware contains the last timer's number.
186 	 * Add 1 to get the number of timers.
187 	 */
188 	hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
189 	hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
190 
191 	if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
192 		cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
193 		    (long)hpet_info.gen_cap.counter_clk_period,
194 		    (long)HPET_MAX_CLK_PERIOD);
195 		return (DDI_FAILURE);
196 	}
197 
198 	num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
199 	if ((num_timers < 3) || (num_timers > 32)) {
200 		cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
201 		    "%lx", (long)num_timers);
202 		return (DDI_FAILURE);
203 	}
204 	hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
205 	    num_timers * sizeof (uint64_t), KM_SLEEP);
206 
207 	ret = hpet_read_gen_config(&hpet_info);
208 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
209 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
210 
211 	/*
212 	 * Solaris does not use the HPET Legacy Replacement Route capabilities.
213 	 * This feature has been off by default on test systems.
214 	 * The HPET spec does not specify if Legacy Replacement Route is
215 	 * on or off by default, so we explicitely set it off here.
216 	 * It should not matter which mode the HPET is in since we use
217 	 * the first available non-legacy replacement timer: timer 2.
218 	 */
219 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
220 
221 	ret = hpet_read_gen_config(&hpet_info);
222 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
223 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
224 
225 	hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
226 	hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
227 
228 	for (ti = 0; ti < num_timers; ++ti) {
229 		ret = hpet_read_timer_N_config(&hpet_info, ti);
230 		/*
231 		 * Make sure no timers are enabled (think fast reboot or
232 		 * virtual hardware).
233 		 */
234 		if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
235 			hpet_disable_timer(&hpet_info, ti);
236 			ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
237 		}
238 
239 		hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
240 	}
241 
242 	/*
243 	 * Be aware the Main Counter may need to be initialized in the future
244 	 * if it is used for more than just Deep C-State support.
245 	 * The HPET's Main Counter does not need to be initialize to a specific
246 	 * value before starting it for use to wake up CPUs from Deep C-States.
247 	 */
248 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
249 		cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
250 		return (DDI_FAILURE);
251 	}
252 
253 	hpet_info.period = hpet_info.gen_cap.counter_clk_period;
254 	/*
255 	 * Read main counter twice to record HPET latency for debugging.
256 	 */
257 	hpet_info.tsc[0] = tsc_read();
258 	hpet_info.hpet_main_counter_reads[0] =
259 	    hpet_read_main_counter_value(&hpet_info);
260 	hpet_info.tsc[1] = tsc_read();
261 	hpet_info.hpet_main_counter_reads[1] =
262 	    hpet_read_main_counter_value(&hpet_info);
263 	hpet_info.tsc[2] = tsc_read();
264 
265 	ret = hpet_read_gen_config(&hpet_info);
266 	hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
267 	hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
268 
269 	/*
270 	 * HPET main counter reads are supported now.
271 	 */
272 	hpet.supported = HPET_TIMER_SUPPORT;
273 
274 	return (hpet_init_proxy(hpet_vect, hpet_flags));
275 }
276 
277 void
278 hpet_acpi_fini(void)
279 {
280 	if (hpet.supported == HPET_NO_SUPPORT)
281 		return;
282 	if (hpet.supported >= HPET_TIMER_SUPPORT)
283 		(void) hpet_stop_main_counter(&hpet_info);
284 	if (hpet.supported > HPET_TIMER_SUPPORT)
285 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
286 }
287 
288 /*
289  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
290  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
291  * Setup data to handle the timer's ISR, and add the timer's interrupt.
292  *
293  * The ddi cannot be use to allocate the HPET timer's interrupt.
294  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
295  * to handle the HPET timer's interrupt.
296  *
297  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
298  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
299  */
300 static int
301 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
302 {
303 	if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
304 		cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
305 		return (DDI_FAILURE);
306 	}
307 
308 	hpet_init_proxy_data();
309 
310 	if (hpet_install_interrupt_handler(&hpet_isr,
311 	    hpet_info.cstate_timer.intr) != AE_OK) {
312 		cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
313 		return (DDI_FAILURE);
314 	}
315 	*hpet_vect = hpet_info.cstate_timer.intr;
316 	hpet_flags->intr_el = INTR_EL_LEVEL;
317 	hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
318 	hpet_flags->bustype = BUS_PCI;		/*  we *do* conform to PCI */
319 
320 	/*
321 	 * Avoid a possibly stuck interrupt by programing the HPET's timer here
322 	 * before the I/O APIC is programmed to handle this interrupt.
323 	 */
324 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
325 	    hpet_info.cstate_timer.intr);
326 
327 	/*
328 	 * All HPET functionality is supported.
329 	 */
330 	hpet.supported = HPET_FULL_SUPPORT;
331 	return (DDI_SUCCESS);
332 }
333 
334 /*
335  * Called by kernel if it can support Deep C-States.
336  */
337 static boolean_t
338 hpet_install_proxy(void)
339 {
340 	if (hpet_state.proxy_installed == B_TRUE)
341 		return (B_TRUE);
342 
343 	if (hpet.supported != HPET_FULL_SUPPORT)
344 		return (B_FALSE);
345 
346 	hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
347 	hpet_state.proxy_installed = B_TRUE;
348 
349 	return (B_TRUE);
350 }
351 
352 /*
353  * Remove the interrupt that was added with add_avintr() in
354  * hpet_install_interrupt_handler().
355  */
356 static void
357 hpet_uninstall_interrupt_handler(void)
358 {
359 	rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
360 	    hpet_info.cstate_timer.intr);
361 }
362 
363 static int
364 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
365 {
366 	ACPI_TABLE_HEADER	*table_header = (ACPI_TABLE_HEADER *)hpet_table;
367 
368 	if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
369 		cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
370 		    "ACPI_TABLE_HPET) %lx.",
371 		    (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
372 		    (unsigned long)sizeof (ACPI_TABLE_HPET));
373 		return (AE_ERROR);
374 	}
375 
376 	if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
377 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
378 		    "signature");
379 		return (AE_ERROR);
380 	}
381 
382 	if (!hpet_checksum_table((unsigned char *)hpet_table,
383 	    (unsigned int)table_header->Length)) {
384 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
385 		return (AE_ERROR);
386 	}
387 
388 	/*
389 	 * Sequence should be table number - 1.  We are using table 1.
390 	 */
391 	if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
392 		cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
393 		    (long)hpet_table->Sequence);
394 		return (AE_ERROR);
395 	}
396 
397 	return (AE_OK);
398 }
399 
400 static boolean_t
401 hpet_checksum_table(unsigned char *table, unsigned int length)
402 {
403 	unsigned char	checksum = 0;
404 	int		i;
405 
406 	for (i = 0; i < length; ++i, ++table)
407 		checksum += *table;
408 
409 	return (checksum == 0);
410 }
411 
412 static void *
413 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
414 {
415 	return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
416 }
417 
418 static int
419 hpet_start_main_counter(hpet_info_t *hip)
420 {
421 	uint64_t	*gcr_ptr;
422 	uint64_t	gcr;
423 
424 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
425 	gcr = *gcr_ptr;
426 
427 	gcr |= HPET_GCFR_ENABLE_CNF;
428 	*gcr_ptr = gcr;
429 	gcr = *gcr_ptr;
430 
431 	return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
432 }
433 
434 static int
435 hpet_stop_main_counter(hpet_info_t *hip)
436 {
437 	uint64_t	*gcr_ptr;
438 	uint64_t	gcr;
439 
440 	gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
441 	gcr = *gcr_ptr;
442 
443 	gcr &= ~HPET_GCFR_ENABLE_CNF;
444 	*gcr_ptr = gcr;
445 	gcr = *gcr_ptr;
446 
447 	return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
448 }
449 
450 /*
451  * Set the Legacy Replacement Route bit.
452  * This should be called before setting up timers.
453  * The HPET specification is silent regarding setting this after timers are
454  * programmed.
455  */
456 static uint64_t
457 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
458 {
459 	uint64_t gen_conf = hpet_read_gen_config(hip);
460 
461 	switch (new_value) {
462 	case 0:
463 		gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
464 		break;
465 
466 	case HPET_GCFR_LEG_RT_CNF:
467 		gen_conf |= HPET_GCFR_LEG_RT_CNF;
468 		break;
469 
470 	default:
471 		ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
472 		break;
473 	}
474 	hpet_write_gen_config(hip, gen_conf);
475 	return (gen_conf);
476 }
477 
478 static uint64_t
479 hpet_read_gen_cap(hpet_info_t *hip)
480 {
481 	return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
482 }
483 
484 static uint64_t
485 hpet_read_gen_config(hpet_info_t *hip)
486 {
487 	return (*(uint64_t *)
488 	    HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
489 }
490 
491 static uint64_t
492 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
493 {
494 	hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
495 	    hip->logical_address);
496 	return (hip->gen_intrpt_stat);
497 }
498 
499 static uint64_t
500 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
501 {
502 	uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
503 	    hip->logical_address, n);
504 	hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
505 	return (conf);
506 }
507 
508 static hpet_TN_conf_cap_t
509 hpet_convert_timer_N_config(uint64_t conf)
510 {
511 	hpet_TN_conf_cap_t cc = { 0 };
512 
513 	cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
514 	cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
515 	cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
516 	cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
517 	cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
518 	cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
519 	cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
520 	cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
521 	cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
522 	cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
523 	cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
524 
525 	return (cc);
526 }
527 
528 static uint64_t
529 hpet_read_timer_N_comp(hpet_info_t *hip, uint_t n)
530 {
531 	if (hip->timer_n_config[n].size_cap == 1)
532 		return (*(uint64_t *)
533 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
534 	else
535 		return (*(uint32_t *)
536 		    HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n));
537 }
538 
539 static uint64_t
540 hpet_read_main_counter_value(hpet_info_t *hip)
541 {
542 	uint64_t	value;
543 	uint32_t	*counter;
544 	uint32_t	high1, high2, low;
545 
546 	counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
547 
548 	/*
549 	 * 32-bit main counters
550 	 */
551 	if (hip->gen_cap.count_size_cap == 0) {
552 		value = (uint64_t)*counter;
553 		hip->main_counter_value = value;
554 		return (value);
555 	}
556 
557 	/*
558 	 * HPET spec claims a 64-bit read can be split into two 32-bit reads
559 	 * by the hardware connection to the HPET.
560 	 */
561 	high2 = counter[1];
562 	do {
563 		high1 = high2;
564 		low = counter[0];
565 		high2 = counter[1];
566 	} while (high2 != high1);
567 
568 	value = ((uint64_t)high1 << 32) | low;
569 	hip->main_counter_value = value;
570 	return (value);
571 }
572 
573 static void
574 hpet_write_gen_cap(hpet_info_t *hip, uint64_t l)
575 {
576 	*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address) = l;
577 }
578 
579 static void
580 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
581 {
582 	*(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
583 }
584 
585 static void
586 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
587 {
588 	*(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
589 }
590 
591 static void
592 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
593 {
594 	if (hip->timer_n_config[n].size_cap == 1)
595 		*(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
596 		    hip->logical_address, n) = l;
597 	else
598 		*(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
599 		    hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
600 }
601 
602 static void
603 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
604 {
605 	*(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
606 }
607 
608 static void
609 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
610 {
611 	uint64_t l;
612 
613 	l = hpet_read_timer_N_config(hip, timer_n);
614 	l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
615 	hpet_write_timer_N_config(hip, timer_n, l);
616 }
617 
618 static void
619 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
620 {
621 	uint64_t l;
622 
623 	l = hpet_read_timer_N_config(hip, timer_n);
624 	l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
625 	hpet_write_timer_N_config(hip, timer_n, l);
626 }
627 
628 static void
629 hpet_write_main_counter_value(hpet_info_t *hip, uint64_t l)
630 {
631 	uint32_t	*address;
632 
633 	/*
634 	 * HPET spec 1.0a states main counter register should be halted before
635 	 * it is written to.
636 	 */
637 	ASSERT(!(hpet_read_gen_config(hip) & HPET_GCFR_ENABLE_CNF));
638 
639 	if (hip->gen_cap.count_size_cap == 1) {
640 		*(uint64_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address)
641 		    = l;
642 	} else {
643 		address = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(
644 		    hip->logical_address);
645 
646 		address[0] = (uint32_t)(l & 0xFFFFFFFF);
647 	}
648 }
649 
650 /*
651  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
652  *
653  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
654  * from apic_picinit() psm_ops apic_ops entry point after we return from
655  * apic_init() psm_ops entry point.
656  */
657 static uint32_t
658 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
659 {
660 	uint32_t retval;
661 
662 	retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
663 	    vector, NULL, NULL, NULL, NULL);
664 	if (retval == 0) {
665 		cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
666 		return (AE_BAD_PARAMETER);
667 	}
668 	return (AE_OK);
669 }
670 
671 /*
672  * The HPET timers specify which I/O APIC interrupts they can be routed to.
673  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
674  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
675  * timer's timer_n_config register.
676  */
677 static int
678 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
679 {
680 	int	timer;
681 	int	intr;
682 
683 	for (timer = HPET_FIRST_NON_LEGACY_TIMER;
684 	    timer < hip->gen_cap.num_tim_cap; ++timer) {
685 
686 		if (!hpet_timer_available(hip->allocated_timers, timer))
687 			continue;
688 
689 		intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
690 		if (intr >= 0) {
691 			hpet_timer_alloc(&hip->allocated_timers, timer);
692 			hip->cstate_timer.timer = timer;
693 			hip->cstate_timer.intr = intr;
694 			return (timer);
695 		}
696 	}
697 
698 	return (-1);
699 }
700 
701 /*
702  * Mark this timer as used.
703  */
704 static void
705 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
706 {
707 	*allocated_timers |= 1 << n;
708 }
709 
710 /*
711  * Check if this timer is available.
712  * No mutual exclusion because only one thread uses this.
713  */
714 static int
715 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
716 {
717 	return ((allocated_timers & (1 << n)) == 0);
718 }
719 
720 /*
721  * Setup timer N to route its interrupt to I/O APIC.
722  */
723 static void
724 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
725 {
726 	uint64_t conf;
727 
728 	conf = hpet_read_timer_N_config(hip, timer_n);
729 
730 	/*
731 	 * Caller is required to verify this interrupt route is supported.
732 	 */
733 	ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
734 
735 	conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;	/* use IOAPIC */
736 	conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
737 	conf &= ~HPET_TIMER_N_TYPE_CNF_BIT;	/* non periodic */
738 	conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;	/* disabled */
739 	conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;	/* Level Triggered */
740 
741 	hpet_write_timer_N_config(hip, timer_n, conf);
742 }
743 
744 /*
745  * The HPET's Main Counter is not stopped before programming an HPET timer.
746  * This will allow the HPET to be used as a time source.
747  * The programmed timer interrupt may occur before this function returns.
748  * Callers must block interrupts before calling this function if they must
749  * guarantee the interrupt is handled after this function returns.
750  *
751  * Return 0 if main counter is less than timer after enabling timer.
752  * The interrupt was programmed, but it may fire before this returns.
753  * Return !0 if main counter is greater than timer after enabling timer.
754  * In other words: the timer will not fire, and we do not know if it did fire.
755  *
756  * delta is in HPET ticks.
757  *
758  * Writing a 64-bit value to a 32-bit register will "wrap around".
759  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
760  */
761 int
762 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
763 {
764 	uint64_t time, program;
765 
766 	program = hpet_read_main_counter_value(hip);
767 	program += delta;
768 	hpet_write_timer_N_comp(hip, timer, program);
769 
770 	time = hpet_read_main_counter_value(hip);
771 	if (time < program)
772 		return (AE_OK);
773 
774 	return (AE_TIME);
775 }
776 
777 /*
778  * CPR and power policy-change callback entry point.
779  */
780 boolean_t
781 hpet_callback(int code)
782 {
783 	switch (code) {
784 	case PM_DEFAULT_CPU_DEEP_IDLE:
785 		/*FALLTHROUGH*/
786 	case PM_ENABLE_CPU_DEEP_IDLE:
787 		/*FALLTHROUGH*/
788 	case PM_DISABLE_CPU_DEEP_IDLE:
789 		return (hpet_deep_idle_config(code));
790 
791 	case CB_CODE_CPR_RESUME:
792 		/*FALLTHROUGH*/
793 	case CB_CODE_CPR_CHKPT:
794 		return (hpet_cpr(code));
795 
796 	case CST_EVENT_MULTIPLE_CSTATES:
797 		hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
798 		return (B_TRUE);
799 
800 	case CST_EVENT_ONE_CSTATE:
801 		hpet_cst_callback(CST_EVENT_ONE_CSTATE);
802 		return (B_TRUE);
803 
804 	default:
805 		cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
806 		return (B_FALSE);
807 	}
808 }
809 
810 /*
811  * According to the HPET spec 1.0a: the Operating System must save and restore
812  * HPET event timer hardware context through ACPI sleep state transitions.
813  * Timer registers (including the main counter) may not be preserved through
814  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
815  *
816  * Current HPET state is already in hpet.supported and
817  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
818  * Timer state.
819  *
820  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
821  * S4, and it is not saved/restored here.  Future projects cannot expect the
822  * Main Counter to be monotomically (or accurately) increasing across CPR.
823  *
824  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
825  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
826  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
827  * It is safe to leave the HPET running as the system suspends; we just
828  * disable the timer from generating interrupts here.
829  */
830 static boolean_t
831 hpet_cpr(int code)
832 {
833 	ulong_t		intr, dead_count = 0;
834 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
835 	boolean_t	ret = B_TRUE;
836 
837 	mutex_enter(&hpet_state_lock);
838 	switch (code) {
839 	case CB_CODE_CPR_CHKPT:
840 		if (hpet_state.proxy_installed == B_FALSE)
841 			break;
842 
843 		hpet_state.cpr = B_TRUE;
844 
845 		intr = intr_clear();
846 		while (!mutex_tryenter(&hpet_proxy_lock)) {
847 			/*
848 			 * spin
849 			 */
850 			intr_restore(intr);
851 			if (dead_count++ > hpet_spin_check) {
852 				dead_count = 0;
853 				if (gethrtime() > dead) {
854 					hpet_state.cpr = B_FALSE;
855 					mutex_exit(&hpet_state_lock);
856 					cmn_err(CE_NOTE, "!hpet_cpr: deadman");
857 					return (B_FALSE);
858 				}
859 			}
860 			intr = intr_clear();
861 		}
862 		hpet_expire_all();
863 		mutex_exit(&hpet_proxy_lock);
864 		intr_restore(intr);
865 
866 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
867 		break;
868 
869 	case CB_CODE_CPR_RESUME:
870 		if (hpet_resume() == B_TRUE)
871 			hpet_state.cpr = B_FALSE;
872 		else
873 			cmn_err(CE_NOTE, "!hpet_resume failed.");
874 		break;
875 
876 	default:
877 		cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
878 		ret = B_FALSE;
879 		break;
880 	}
881 	mutex_exit(&hpet_state_lock);
882 	return (ret);
883 }
884 
885 /*
886  * Assume the HPET stopped in Suspend state and timer state was lost.
887  */
888 static boolean_t
889 hpet_resume(void)
890 {
891 	if (hpet.supported != HPET_TIMER_SUPPORT)
892 		return (B_TRUE);
893 
894 	/*
895 	 * The HPET spec does not specify if Legacy Replacement Route is
896 	 * on or off by default, so we set it off here.
897 	 */
898 	(void) hpet_set_leg_rt_cnf(&hpet_info, 0);
899 
900 	if (hpet_start_main_counter(&hpet_info) != AE_OK) {
901 		cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
902 		hpet.supported = HPET_NO_SUPPORT;
903 		if (hpet_state.proxy_installed == B_TRUE) {
904 			hpet_state.proxy_installed = B_FALSE;
905 			hpet_uninstall_interrupt_handler();
906 		}
907 		return (B_FALSE);
908 	}
909 
910 	if (hpet_state.proxy_installed == B_FALSE)
911 		return (B_TRUE);
912 
913 	hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
914 	    hpet_info.cstate_timer.intr);
915 	if (hpet_state.cpu_deep_idle == B_TRUE)
916 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
917 
918 	return (B_TRUE);
919 }
920 
921 /*
922  * Callback to enable/disable Deep C-States based on power.conf setting.
923  */
924 static boolean_t
925 hpet_deep_idle_config(int code)
926 {
927 	ulong_t		intr, dead_count = 0;
928 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
929 	boolean_t	ret = B_TRUE;
930 
931 	mutex_enter(&hpet_state_lock);
932 	switch (code) {
933 	case PM_DEFAULT_CPU_DEEP_IDLE:
934 		/*FALLTHROUGH*/
935 	case PM_ENABLE_CPU_DEEP_IDLE:
936 
937 		if (hpet_state.cpu_deep_idle == B_TRUE)
938 			break;
939 
940 		if (hpet_state.proxy_installed == B_FALSE) {
941 			ret = B_FALSE;  /* Deep C-States not supported */
942 			break;
943 		}
944 
945 		hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
946 		hpet_state.cpu_deep_idle = B_TRUE;
947 		break;
948 
949 	case PM_DISABLE_CPU_DEEP_IDLE:
950 
951 		if ((hpet_state.cpu_deep_idle == B_FALSE) ||
952 		    (hpet_state.proxy_installed == B_FALSE))
953 			break;
954 
955 		/*
956 		 * The order of these operations is important to avoid
957 		 * lost wakeups: Set a flag to refuse all future LAPIC Timer
958 		 * proxy requests, then wake up all CPUs from deep C-state,
959 		 * and finally disable the HPET interrupt-generating timer.
960 		 */
961 		hpet_state.cpu_deep_idle = B_FALSE;
962 
963 		intr = intr_clear();
964 		while (!mutex_tryenter(&hpet_proxy_lock)) {
965 			/*
966 			 * spin
967 			 */
968 			intr_restore(intr);
969 			if (dead_count++ > hpet_spin_check) {
970 				dead_count = 0;
971 				if (gethrtime() > dead) {
972 					hpet_state.cpu_deep_idle = B_TRUE;
973 					mutex_exit(&hpet_state_lock);
974 					cmn_err(CE_NOTE,
975 					    "!hpet_deep_idle_config: deadman");
976 					return (B_FALSE);
977 				}
978 			}
979 			intr = intr_clear();
980 		}
981 		hpet_expire_all();
982 		mutex_exit(&hpet_proxy_lock);
983 		intr_restore(intr);
984 
985 		hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
986 		break;
987 
988 	default:
989 		cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
990 		    code);
991 		ret = B_FALSE;
992 		break;
993 	}
994 	mutex_exit(&hpet_state_lock);
995 
996 	return (ret);
997 }
998 
999 /*
1000  * Callback for _CST c-state change notifications.
1001  */
1002 static void
1003 hpet_cst_callback(uint32_t code)
1004 {
1005 	ulong_t		intr, dead_count = 0;
1006 	hrtime_t	dead = gethrtime() + hpet_spin_timeout;
1007 
1008 	switch (code) {
1009 	case CST_EVENT_ONE_CSTATE:
1010 		hpet_state.uni_cstate = B_TRUE;
1011 		intr = intr_clear();
1012 		while (!mutex_tryenter(&hpet_proxy_lock)) {
1013 			/*
1014 			 * spin
1015 			 */
1016 			intr_restore(intr);
1017 			if (dead_count++ > hpet_spin_check) {
1018 				dead_count = 0;
1019 				if (gethrtime() > dead) {
1020 					hpet_expire_all();
1021 					cmn_err(CE_NOTE,
1022 					    "!hpet_cst_callback: deadman");
1023 					return;
1024 				}
1025 			}
1026 			intr = intr_clear();
1027 		}
1028 		hpet_expire_all();
1029 		mutex_exit(&hpet_proxy_lock);
1030 		intr_restore(intr);
1031 		break;
1032 
1033 	case CST_EVENT_MULTIPLE_CSTATES:
1034 		hpet_state.uni_cstate = B_FALSE;
1035 		break;
1036 
1037 	default:
1038 		cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
1039 		break;
1040 	}
1041 }
1042 
1043 /*
1044  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1045  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1046  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1047  * needed.
1048  */
1049 /* ARGSUSED */
1050 static uint_t
1051 hpet_isr(char *arg)
1052 {
1053 	uint64_t	timer_status;
1054 	uint64_t	timer_mask;
1055 	ulong_t		intr, dead_count = 0;
1056 	hrtime_t	dead = gethrtime() + hpet_isr_spin_timeout;
1057 
1058 	timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1059 
1060 	/*
1061 	 * We are using a level-triggered interrupt.
1062 	 * HPET sets timer's General Interrupt Status Register bit N.
1063 	 * ISR checks this bit to see if it needs servicing.
1064 	 * ISR then clears this bit by writing 1 to that bit.
1065 	 */
1066 	timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1067 	if (!(timer_status & timer_mask))
1068 		return (DDI_INTR_UNCLAIMED);
1069 	hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1070 
1071 	/*
1072 	 * Do not touch ISR data structures before checking the HPET's General
1073 	 * Interrupt Status register.  The General Interrupt Status register
1074 	 * will not be set by hardware until after timer interrupt generation
1075 	 * is enabled by software.  Software allocates necessary data
1076 	 * structures before enabling timer interrupts.  ASSERT the software
1077 	 * data structures required to handle this interrupt are initialized.
1078 	 */
1079 	ASSERT(hpet_proxy_users != NULL);
1080 
1081 	/*
1082 	 * CPUs in deep c-states do not enable interrupts until after
1083 	 * performing idle cleanup which includes descheduling themselves from
1084 	 * the HPET.  The CPU running this ISR will NEVER find itself in the
1085 	 * proxy list.  A lost wakeup may occur if this is false.
1086 	 */
1087 	ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1088 
1089 	/*
1090 	 * Higher level interrupts may deadlock with CPUs going idle if this
1091 	 * ISR is prempted while holding hpet_proxy_lock.
1092 	 */
1093 	intr = intr_clear();
1094 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1095 		/*
1096 		 * spin
1097 		 */
1098 		intr_restore(intr);
1099 		if (dead_count++ > hpet_spin_check) {
1100 			dead_count = 0;
1101 			if (gethrtime() > dead) {
1102 				hpet_expire_all();
1103 				return (DDI_INTR_CLAIMED);
1104 			}
1105 		}
1106 		intr = intr_clear();
1107 	}
1108 	(void) hpet_guaranteed_schedule(HPET_INFINITY);
1109 	mutex_exit(&hpet_proxy_lock);
1110 	intr_restore(intr);
1111 
1112 	return (DDI_INTR_CLAIMED);
1113 }
1114 
1115 /*
1116  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
1117  * woken up because they can no longer rely on the HPET's Timer to wake them.
1118  * We do not need to wait for CPUs to wakeup.
1119  */
1120 static void
1121 hpet_expire_all(void)
1122 {
1123 	processorid_t	id;
1124 
1125 	for (id = 0; id < max_ncpus; ++id) {
1126 		if (hpet_proxy_users[id] != HPET_INFINITY) {
1127 			hpet_proxy_users[id] = HPET_INFINITY;
1128 			if (id != CPU->cpu_id)
1129 				poke_cpu(id);
1130 		}
1131 	}
1132 }
1133 
1134 /*
1135  * To avoid missed wakeups this function must guarantee either the HPET timer
1136  * was successfully programmed to the next expire time or there are no waiting
1137  * CPUs.
1138  *
1139  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1140  * generate its next interrupt to happen at required_wakeup_time or sooner.
1141  * Returns B_TRUE if the HPET was programmed to interrupt by
1142  * required_wakeup_time, B_FALSE if not.
1143  */
1144 static boolean_t
1145 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1146 {
1147 	hrtime_t	now, next_proxy_time;
1148 	processorid_t	id, next_proxy_id;
1149 	int		proxy_timer = hpet_info.cstate_timer.timer;
1150 	boolean_t	done = B_FALSE;
1151 
1152 	ASSERT(mutex_owned(&hpet_proxy_lock));
1153 
1154 	/*
1155 	 * Loop until we successfully program the HPET,
1156 	 * or no CPUs are scheduled to use the HPET as a proxy.
1157 	 */
1158 	do {
1159 		/*
1160 		 * Wake all CPUs that expired before now.
1161 		 * Find the next CPU to wake up and next HPET program time.
1162 		 */
1163 		now = gethrtime();
1164 		next_proxy_time = HPET_INFINITY;
1165 		next_proxy_id = CPU->cpu_id;
1166 		for (id = 0; id < max_ncpus; ++id) {
1167 			if (hpet_proxy_users[id] < now) {
1168 				hpet_proxy_users[id] = HPET_INFINITY;
1169 				if (id != CPU->cpu_id)
1170 					poke_cpu(id);
1171 			} else if (hpet_proxy_users[id] < next_proxy_time) {
1172 				next_proxy_time = hpet_proxy_users[id];
1173 				next_proxy_id = id;
1174 			}
1175 		}
1176 
1177 		if (next_proxy_time == HPET_INFINITY) {
1178 			done = B_TRUE;
1179 			/*
1180 			 * There are currently no CPUs using the HPET's Timer
1181 			 * as a proxy for their LAPIC Timer.  The HPET's Timer
1182 			 * does not need to be programmed.
1183 			 *
1184 			 * Letting the HPET timer wrap around to the current
1185 			 * time is the longest possible timeout.
1186 			 * A 64-bit timer will wrap around in ~ 2^44 seconds.
1187 			 * A 32-bit timer will wrap around in ~ 2^12 seconds.
1188 			 *
1189 			 * Disabling the HPET's timer interrupt requires a
1190 			 * (relatively expensive) write to the HPET.
1191 			 * Instead we do nothing.
1192 			 *
1193 			 * We are gambling some CPU will attempt to enter a
1194 			 * deep c-state before the timer wraps around.
1195 			 * We assume one spurious interrupt in a little over an
1196 			 * hour has less performance impact than writing to the
1197 			 * HPET's timer disable bit every time all CPUs wakeup
1198 			 * from deep c-state.
1199 			 */
1200 
1201 		} else {
1202 			/*
1203 			 * Idle CPUs disable interrupts before programming the
1204 			 * HPET to prevent a lost wakeup if the HPET
1205 			 * interrupts the idle cpu before it can enter a
1206 			 * Deep C-State.
1207 			 */
1208 			if (hpet_timer_program(&hpet_info, proxy_timer,
1209 			    HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1210 			    != AE_OK) {
1211 				/*
1212 				 * We could not program the HPET to wakeup the
1213 				 * next CPU.  We must wake the CPU ourself to
1214 				 * avoid a lost wakeup.
1215 				 */
1216 				hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1217 				if (next_proxy_id != CPU->cpu_id)
1218 					poke_cpu(next_proxy_id);
1219 			} else {
1220 				done = B_TRUE;
1221 			}
1222 		}
1223 
1224 	} while (!done);
1225 
1226 	return (next_proxy_time <= required_wakeup_time);
1227 }
1228 
1229 /*
1230  * Use an HPET timer to act as this CPU's proxy local APIC timer.
1231  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1232  * Called by the idle thread with interrupts enabled.
1233  * Always returns with interrupts disabled.
1234  *
1235  * There are 3 possible outcomes from this function:
1236  * 1. The Local APIC Timer was already disabled before this function was called.
1237  *	LAPIC TIMER	: disabled
1238  *	HPET		: not scheduled to wake this CPU
1239  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1240  *	Returns		: B_TRUE
1241  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1242  *	LAPIC TIMER	: disabled
1243  *	HPET		: scheduled to wake this CPU
1244  *	*lapic_expire	: hrtime_t when LAPIC timer would have expired
1245  *	Returns		: B_TRUE
1246  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1247  *	LAPIC TIMER	: enabled
1248  *	HPET		: not scheduled to wake this CPU
1249  *	*lapic_expire	: (hrtime_t)HPET_INFINITY
1250  *	Returns		: B_FALSE
1251  *
1252  * The idle thread cannot enter Deep C-State in case 3.
1253  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1254  */
1255 static boolean_t
1256 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1257 {
1258 	extern hrtime_t	apic_timer_stop_count(void);
1259 	extern void	apic_timer_restart(hrtime_t);
1260 	hrtime_t	now, expire, dead;
1261 	uint64_t	lapic_count, dead_count;
1262 	cpupart_t	*cpu_part;
1263 	processorid_t	cpu_sid;
1264 	processorid_t	cpu_id = CPU->cpu_id;
1265 	processorid_t	id;
1266 	boolean_t	rslt;
1267 	boolean_t	hset_update;
1268 
1269 	cpu_part = CPU->cpu_part;
1270 	cpu_sid = CPU->cpu_seqid;
1271 
1272 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1273 
1274 	/*
1275 	 * A critical section exists between when the HPET is programmed
1276 	 * to interrupt the CPU and when this CPU enters an idle state.
1277 	 * Interrupts must be blocked during that time to prevent lost
1278 	 * CBE wakeup interrupts from either LAPIC or HPET.
1279 	 *
1280 	 * Must block interrupts before acquiring hpet_proxy_lock to prevent
1281 	 * a deadlock with the ISR if the ISR runs on this CPU after the
1282 	 * idle thread acquires the mutex but before it clears interrupts.
1283 	 */
1284 	ASSERT(!interrupts_enabled());
1285 	lapic_count = apic_timer_stop_count();
1286 	now = gethrtime();
1287 	dead = now + hpet_idle_spin_timeout;
1288 	*lapic_expire = expire = now + lapic_count;
1289 	if (lapic_count == (hrtime_t)-1) {
1290 		/*
1291 		 * LAPIC timer is currently disabled.
1292 		 * Will not use the HPET as a LAPIC Timer proxy.
1293 		 */
1294 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1295 		return (B_TRUE);
1296 	}
1297 
1298 	/*
1299 	 * Serialize hpet_proxy data structure manipulation.
1300 	 */
1301 	dead_count = 0;
1302 	while (!mutex_tryenter(&hpet_proxy_lock)) {
1303 		/*
1304 		 * spin
1305 		 */
1306 		apic_timer_restart(expire);
1307 		sti();
1308 		cli();
1309 
1310 		if (dead_count++ > hpet_spin_check) {
1311 			dead_count = 0;
1312 			hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1313 			    (ncpus > 1));
1314 			if (hset_update &&
1315 			    !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1316 				*lapic_expire = (hrtime_t)HPET_INFINITY;
1317 				return (B_FALSE);
1318 			}
1319 		}
1320 
1321 		lapic_count = apic_timer_stop_count();
1322 		now = gethrtime();
1323 		*lapic_expire = expire = now + lapic_count;
1324 		if (lapic_count == (hrtime_t)-1) {
1325 			/*
1326 			 * LAPIC timer is currently disabled.
1327 			 * Will not use the HPET as a LAPIC Timer proxy.
1328 			 */
1329 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1330 			return (B_TRUE);
1331 		}
1332 		if (now > dead) {
1333 			apic_timer_restart(expire);
1334 			*lapic_expire = (hrtime_t)HPET_INFINITY;
1335 			return (B_FALSE);
1336 		}
1337 	}
1338 
1339 	if ((hpet_state.cpr == B_TRUE) ||
1340 	    (hpet_state.cpu_deep_idle == B_FALSE) ||
1341 	    (hpet_state.proxy_installed == B_FALSE) ||
1342 	    (hpet_state.uni_cstate == B_TRUE)) {
1343 		mutex_exit(&hpet_proxy_lock);
1344 		apic_timer_restart(expire);
1345 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1346 		return (B_FALSE);
1347 	}
1348 
1349 	hpet_proxy_users[cpu_id] = expire;
1350 
1351 	/*
1352 	 * We are done if another cpu is scheduled on the HPET with an
1353 	 * expire time before us.  The next HPET interrupt has been programmed
1354 	 * to fire before our expire time.
1355 	 */
1356 	for (id = 0; id < max_ncpus; ++id) {
1357 		if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1358 			mutex_exit(&hpet_proxy_lock);
1359 			return (B_TRUE);
1360 		}
1361 	}
1362 
1363 	/*
1364 	 * We are the next lAPIC to expire.
1365 	 * Program the HPET with our expire time.
1366 	 */
1367 	rslt = hpet_guaranteed_schedule(expire);
1368 	mutex_exit(&hpet_proxy_lock);
1369 
1370 	if (rslt == B_FALSE) {
1371 		apic_timer_restart(expire);
1372 		*lapic_expire = (hrtime_t)HPET_INFINITY;
1373 	}
1374 
1375 	return (rslt);
1376 }
1377 
1378 /*
1379  * Called by the idle thread when waking up from Deep C-state before enabling
1380  * interrupts.  With an array data structure it is faster to always remove
1381  * ourself from the array without checking if the HPET ISR already removed.
1382  *
1383  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1384  * We do not reprogram the HPET here because this CPU has real work to do.
1385  * On a idle system the CPU was probably woken up by the HPET's ISR.
1386  * On a heavily loaded system CPUs are not going into Deep C-state.
1387  * On a moderately loaded system another CPU will usually enter Deep C-state
1388  * and reprogram the HPET before the HPET fires with our wakeup.
1389  */
1390 static void
1391 hpet_use_lapic_timer(hrtime_t expire)
1392 {
1393 	extern void	apic_timer_restart(hrtime_t);
1394 	processorid_t	cpu_id = CPU->cpu_id;
1395 
1396 	ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1397 	ASSERT(!interrupts_enabled());
1398 
1399 	hpet_proxy_users[cpu_id] = HPET_INFINITY;
1400 
1401 	/*
1402 	 * Do not enable a LAPIC Timer that was initially disabled.
1403 	 */
1404 	if (expire != HPET_INFINITY)
1405 		apic_timer_restart(expire);
1406 }
1407 
1408 /*
1409  * Initialize data structure to keep track of CPUs using HPET as a proxy for
1410  * their stalled local APIC timer.  For now this is just an array.
1411  */
1412 static void
1413 hpet_init_proxy_data(void)
1414 {
1415 	processorid_t	id;
1416 
1417 	/*
1418 	 * Use max_ncpus for hot plug compliance.
1419 	 */
1420 	hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1421 	    KM_SLEEP);
1422 
1423 	/*
1424 	 * Unused entries always contain HPET_INFINITY.
1425 	 */
1426 	for (id = 0; id < max_ncpus; ++id)
1427 		hpet_proxy_users[id] = HPET_INFINITY;
1428 }
1429