1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/hpet_acpi.h>
26 #include <sys/hpet.h>
27 #include <sys/bitmap.h>
28 #include <sys/inttypes.h>
29 #include <sys/time.h>
30 #include <sys/sunddi.h>
31 #include <sys/ksynch.h>
32 #include <sys/apic.h>
33 #include <sys/callb.h>
34 #include <sys/clock.h>
35 #include <sys/archsystm.h>
36 #include <sys/cpupart.h>
37
38 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
39 static boolean_t hpet_install_proxy(void);
40 static boolean_t hpet_callback(int code);
41 static boolean_t hpet_cpr(int code);
42 static boolean_t hpet_resume(void);
43 static void hpet_cst_callback(uint32_t code);
44 static boolean_t hpet_deep_idle_config(int code);
45 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
46 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
47 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
48 static int hpet_start_main_counter(hpet_info_t *hip);
49 static int hpet_stop_main_counter(hpet_info_t *hip);
50 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
51 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
52 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
53 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
54 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
55 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
56 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
57 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
58 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
59 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
60 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
61 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
62 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
63 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
64 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
65 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
66 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
67 uint32_t interrupt);
68 static uint_t hpet_isr(char *arg);
69 static uint32_t hpet_install_interrupt_handler(uint_t (*func)(char *),
70 int vector);
71 static void hpet_uninstall_interrupt_handler(void);
72 static void hpet_expire_all(void);
73 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
74 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
75 static void hpet_use_lapic_timer(hrtime_t expire);
76 static void hpet_init_proxy_data(void);
77
78 /*
79 * hpet_state_lock is used to synchronize disabling/enabling deep c-states
80 * and to synchronize suspend/resume.
81 */
82 static kmutex_t hpet_state_lock;
83 static struct hpet_state {
84 boolean_t proxy_installed; /* CBE proxy interrupt setup */
85 boolean_t cpr; /* currently in CPR */
86 boolean_t cpu_deep_idle; /* user enable/disable */
87 boolean_t uni_cstate; /* disable if only one cstate */
88 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
89
90 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
91 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
92 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
93 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
94
95 static kmutex_t hpet_proxy_lock; /* lock for lAPIC proxy data */
96 /*
97 * hpet_proxy_users is a per-cpu array.
98 */
99 static hpet_proxy_t *hpet_proxy_users; /* one per CPU */
100
101
102 ACPI_TABLE_HPET *hpet_table; /* ACPI HPET table */
103 hpet_info_t hpet_info; /* Human readable Information */
104
105 /*
106 * Provide HPET access from unix.so.
107 * Set up pointers to access symbols in pcplusmp.
108 */
109 static void
hpet_establish_hooks(void)110 hpet_establish_hooks(void)
111 {
112 hpet.install_proxy = &hpet_install_proxy;
113 hpet.callback = &hpet_callback;
114 hpet.use_hpet_timer = &hpet_use_hpet_timer;
115 hpet.use_lapic_timer = &hpet_use_lapic_timer;
116 }
117
118 /*
119 * Get the ACPI "HPET" table.
120 * acpi_probe() calls this function from mp_startup before drivers are loaded.
121 * acpi_probe() verified the system is using ACPI before calling this.
122 *
123 * There may be more than one ACPI HPET table (Itanium only?).
124 * Intel's HPET spec defines each timer block to have up to 32 counters and
125 * be 1024 bytes long. There can be more than one timer block of 32 counters.
126 * Each timer block would have an additional ACPI HPET table.
127 * Typical x86 systems today only have 1 HPET with 3 counters.
128 * On x86 we only consume HPET table "1" for now.
129 */
130 int
hpet_acpi_init(int * hpet_vect,iflag_t * hpet_flags)131 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
132 {
133 extern hrtime_t tsc_read(void);
134 extern int idle_cpu_no_deep_c;
135 extern int cpuid_deep_cstates_supported(void);
136 void *la;
137 uint64_t ret;
138 uint_t num_timers;
139 uint_t ti;
140
141 (void) memset(&hpet_info, 0, sizeof (hpet_info));
142 hpet.supported = HPET_NO_SUPPORT;
143
144 if (idle_cpu_no_deep_c)
145 return (DDI_FAILURE);
146
147 if (!cpuid_deep_cstates_supported())
148 return (DDI_FAILURE);
149
150 hpet_establish_hooks();
151
152 /*
153 * Get HPET ACPI table 1.
154 */
155 if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
156 (ACPI_TABLE_HEADER **)&hpet_table))) {
157 cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
158 return (DDI_FAILURE);
159 }
160
161 if (hpet_validate_table(hpet_table) != AE_OK) {
162 cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
163 return (DDI_FAILURE);
164 }
165
166 la = hpet_memory_map(hpet_table);
167 if (la == NULL) {
168 cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
169 return (DDI_FAILURE);
170 }
171 hpet_info.logical_address = la;
172
173 ret = hpet_read_gen_cap(&hpet_info);
174 hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
175 hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
176 hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
177 hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
178 /*
179 * Hardware contains the last timer's number.
180 * Add 1 to get the number of timers.
181 */
182 hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
183 hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
184
185 if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
186 cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
187 (long)hpet_info.gen_cap.counter_clk_period,
188 (long)HPET_MAX_CLK_PERIOD);
189 return (DDI_FAILURE);
190 }
191
192 num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
193 if ((num_timers < 3) || (num_timers > 32)) {
194 cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
195 "%lx", (long)num_timers);
196 return (DDI_FAILURE);
197 }
198 hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
199 num_timers * sizeof (uint64_t), KM_SLEEP);
200
201 ret = hpet_read_gen_config(&hpet_info);
202 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
203 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
204
205 /*
206 * Solaris does not use the HPET Legacy Replacement Route capabilities.
207 * This feature has been off by default on test systems.
208 * The HPET spec does not specify if Legacy Replacement Route is
209 * on or off by default, so we explicitely set it off here.
210 * It should not matter which mode the HPET is in since we use
211 * the first available non-legacy replacement timer: timer 2.
212 */
213 (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
214
215 ret = hpet_read_gen_config(&hpet_info);
216 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
217 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
218
219 hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
220 hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
221
222 for (ti = 0; ti < num_timers; ++ti) {
223 ret = hpet_read_timer_N_config(&hpet_info, ti);
224 /*
225 * Make sure no timers are enabled (think fast reboot or
226 * virtual hardware).
227 */
228 if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
229 hpet_disable_timer(&hpet_info, ti);
230 ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
231 }
232
233 hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
234 }
235
236 /*
237 * Be aware the Main Counter may need to be initialized in the future
238 * if it is used for more than just Deep C-State support.
239 * The HPET's Main Counter does not need to be initialize to a specific
240 * value before starting it for use to wake up CPUs from Deep C-States.
241 */
242 if (hpet_start_main_counter(&hpet_info) != AE_OK) {
243 cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
244 return (DDI_FAILURE);
245 }
246
247 hpet_info.period = hpet_info.gen_cap.counter_clk_period;
248 /*
249 * Read main counter twice to record HPET latency for debugging.
250 */
251 hpet_info.tsc[0] = tsc_read();
252 hpet_info.hpet_main_counter_reads[0] =
253 hpet_read_main_counter_value(&hpet_info);
254 hpet_info.tsc[1] = tsc_read();
255 hpet_info.hpet_main_counter_reads[1] =
256 hpet_read_main_counter_value(&hpet_info);
257 hpet_info.tsc[2] = tsc_read();
258
259 ret = hpet_read_gen_config(&hpet_info);
260 hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
261 hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
262
263 /*
264 * HPET main counter reads are supported now.
265 */
266 hpet.supported = HPET_TIMER_SUPPORT;
267
268 return (hpet_init_proxy(hpet_vect, hpet_flags));
269 }
270
271 void
hpet_acpi_fini(void)272 hpet_acpi_fini(void)
273 {
274 if (hpet.supported == HPET_NO_SUPPORT)
275 return;
276 if (hpet.supported >= HPET_TIMER_SUPPORT)
277 (void) hpet_stop_main_counter(&hpet_info);
278 if (hpet.supported > HPET_TIMER_SUPPORT)
279 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
280 }
281
282 /*
283 * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
284 * LAPIC Timers. Get a free HPET timer that supports I/O APIC routed interrupt.
285 * Setup data to handle the timer's ISR, and add the timer's interrupt.
286 *
287 * The ddi cannot be use to allocate the HPET timer's interrupt.
288 * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
289 * to handle the HPET timer's interrupt.
290 *
291 * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
292 * ICH9. The HPET spec allows for MSI. In the future MSI may be prefered.
293 */
294 static int
hpet_init_proxy(int * hpet_vect,iflag_t * hpet_flags)295 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
296 {
297 if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
298 cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
299 return (DDI_FAILURE);
300 }
301
302 hpet_init_proxy_data();
303
304 if (hpet_install_interrupt_handler(&hpet_isr,
305 hpet_info.cstate_timer.intr) != AE_OK) {
306 cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
307 return (DDI_FAILURE);
308 }
309 *hpet_vect = hpet_info.cstate_timer.intr;
310 hpet_flags->intr_el = INTR_EL_LEVEL;
311 hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
312 hpet_flags->bustype = BUS_PCI; /* we *do* conform to PCI */
313
314 /*
315 * Avoid a possibly stuck interrupt by programing the HPET's timer here
316 * before the I/O APIC is programmed to handle this interrupt.
317 */
318 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
319 hpet_info.cstate_timer.intr);
320
321 /*
322 * All HPET functionality is supported.
323 */
324 hpet.supported = HPET_FULL_SUPPORT;
325 return (DDI_SUCCESS);
326 }
327
328 /*
329 * Called by kernel if it can support Deep C-States.
330 */
331 static boolean_t
hpet_install_proxy(void)332 hpet_install_proxy(void)
333 {
334 if (hpet_state.proxy_installed == B_TRUE)
335 return (B_TRUE);
336
337 if (hpet.supported != HPET_FULL_SUPPORT)
338 return (B_FALSE);
339
340 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
341 hpet_state.proxy_installed = B_TRUE;
342
343 return (B_TRUE);
344 }
345
346 /*
347 * Remove the interrupt that was added with add_avintr() in
348 * hpet_install_interrupt_handler().
349 */
350 static void
hpet_uninstall_interrupt_handler(void)351 hpet_uninstall_interrupt_handler(void)
352 {
353 rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
354 hpet_info.cstate_timer.intr);
355 }
356
357 static int
hpet_validate_table(ACPI_TABLE_HPET * hpet_table)358 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
359 {
360 ACPI_TABLE_HEADER *table_header = (ACPI_TABLE_HEADER *)hpet_table;
361
362 if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
363 cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
364 "ACPI_TABLE_HPET) %lx.",
365 (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
366 (unsigned long)sizeof (ACPI_TABLE_HPET));
367 return (AE_ERROR);
368 }
369
370 if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
371 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
372 "signature");
373 return (AE_ERROR);
374 }
375
376 if (!hpet_checksum_table((unsigned char *)hpet_table,
377 (unsigned int)table_header->Length)) {
378 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
379 return (AE_ERROR);
380 }
381
382 /*
383 * Sequence should be table number - 1. We are using table 1.
384 */
385 if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
386 cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
387 (long)hpet_table->Sequence);
388 return (AE_ERROR);
389 }
390
391 return (AE_OK);
392 }
393
394 static boolean_t
hpet_checksum_table(unsigned char * table,unsigned int length)395 hpet_checksum_table(unsigned char *table, unsigned int length)
396 {
397 unsigned char checksum = 0;
398 int i;
399
400 for (i = 0; i < length; ++i, ++table)
401 checksum += *table;
402
403 return (checksum == 0);
404 }
405
406 static void *
hpet_memory_map(ACPI_TABLE_HPET * hpet_table)407 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
408 {
409 return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
410 }
411
412 static int
hpet_start_main_counter(hpet_info_t * hip)413 hpet_start_main_counter(hpet_info_t *hip)
414 {
415 uint64_t *gcr_ptr;
416 uint64_t gcr;
417
418 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
419 gcr = *gcr_ptr;
420
421 gcr |= HPET_GCFR_ENABLE_CNF;
422 *gcr_ptr = gcr;
423 gcr = *gcr_ptr;
424
425 return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
426 }
427
428 static int
hpet_stop_main_counter(hpet_info_t * hip)429 hpet_stop_main_counter(hpet_info_t *hip)
430 {
431 uint64_t *gcr_ptr;
432 uint64_t gcr;
433
434 gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
435 gcr = *gcr_ptr;
436
437 gcr &= ~HPET_GCFR_ENABLE_CNF;
438 *gcr_ptr = gcr;
439 gcr = *gcr_ptr;
440
441 return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
442 }
443
444 /*
445 * Set the Legacy Replacement Route bit.
446 * This should be called before setting up timers.
447 * The HPET specification is silent regarding setting this after timers are
448 * programmed.
449 */
450 static uint64_t
hpet_set_leg_rt_cnf(hpet_info_t * hip,uint32_t new_value)451 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
452 {
453 uint64_t gen_conf = hpet_read_gen_config(hip);
454
455 switch (new_value) {
456 case 0:
457 gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
458 break;
459
460 case HPET_GCFR_LEG_RT_CNF:
461 gen_conf |= HPET_GCFR_LEG_RT_CNF;
462 break;
463
464 default:
465 ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
466 break;
467 }
468 hpet_write_gen_config(hip, gen_conf);
469 return (gen_conf);
470 }
471
472 static uint64_t
hpet_read_gen_cap(hpet_info_t * hip)473 hpet_read_gen_cap(hpet_info_t *hip)
474 {
475 return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
476 }
477
478 static uint64_t
hpet_read_gen_config(hpet_info_t * hip)479 hpet_read_gen_config(hpet_info_t *hip)
480 {
481 return (*(uint64_t *)
482 HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
483 }
484
485 static uint64_t
hpet_read_gen_intrpt_stat(hpet_info_t * hip)486 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
487 {
488 hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
489 hip->logical_address);
490 return (hip->gen_intrpt_stat);
491 }
492
493 static uint64_t
hpet_read_timer_N_config(hpet_info_t * hip,uint_t n)494 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
495 {
496 uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
497 hip->logical_address, n);
498 hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
499 return (conf);
500 }
501
502 static hpet_TN_conf_cap_t
hpet_convert_timer_N_config(uint64_t conf)503 hpet_convert_timer_N_config(uint64_t conf)
504 {
505 hpet_TN_conf_cap_t cc = { 0 };
506
507 cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
508 cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
509 cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
510 cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
511 cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
512 cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
513 cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
514 cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
515 cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
516 cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
517 cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
518
519 return (cc);
520 }
521
522 static uint64_t
hpet_read_main_counter_value(hpet_info_t * hip)523 hpet_read_main_counter_value(hpet_info_t *hip)
524 {
525 uint64_t value;
526 uint32_t *counter;
527 uint32_t high1, high2, low;
528
529 counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
530
531 /*
532 * 32-bit main counters
533 */
534 if (hip->gen_cap.count_size_cap == 0) {
535 value = (uint64_t)*counter;
536 hip->main_counter_value = value;
537 return (value);
538 }
539
540 /*
541 * HPET spec claims a 64-bit read can be split into two 32-bit reads
542 * by the hardware connection to the HPET.
543 */
544 high2 = counter[1];
545 do {
546 high1 = high2;
547 low = counter[0];
548 high2 = counter[1];
549 } while (high2 != high1);
550
551 value = ((uint64_t)high1 << 32) | low;
552 hip->main_counter_value = value;
553 return (value);
554 }
555
556 static void
hpet_write_gen_config(hpet_info_t * hip,uint64_t l)557 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
558 {
559 *(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
560 }
561
562 static void
hpet_write_gen_intrpt_stat(hpet_info_t * hip,uint64_t l)563 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
564 {
565 *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
566 }
567
568 static void
hpet_write_timer_N_config(hpet_info_t * hip,uint_t n,uint64_t l)569 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
570 {
571 if (hip->timer_n_config[n].size_cap == 1)
572 *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
573 hip->logical_address, n) = l;
574 else
575 *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
576 hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
577 }
578
579 static void
hpet_write_timer_N_comp(hpet_info_t * hip,uint_t n,uint64_t l)580 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
581 {
582 *(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
583 }
584
585 static void
hpet_disable_timer(hpet_info_t * hip,uint32_t timer_n)586 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
587 {
588 uint64_t l;
589
590 l = hpet_read_timer_N_config(hip, timer_n);
591 l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
592 hpet_write_timer_N_config(hip, timer_n, l);
593 }
594
595 static void
hpet_enable_timer(hpet_info_t * hip,uint32_t timer_n)596 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
597 {
598 uint64_t l;
599
600 l = hpet_read_timer_N_config(hip, timer_n);
601 l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
602 hpet_write_timer_N_config(hip, timer_n, l);
603 }
604
605 /*
606 * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
607 *
608 * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
609 * from apic_picinit() psm_ops apic_ops entry point after we return from
610 * apic_init() psm_ops entry point.
611 */
612 static uint32_t
hpet_install_interrupt_handler(uint_t (* func)(char *),int vector)613 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
614 {
615 uint32_t retval;
616
617 retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
618 vector, NULL, NULL, NULL, NULL);
619 if (retval == 0) {
620 cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
621 return (AE_BAD_PARAMETER);
622 }
623 return (AE_OK);
624 }
625
626 /*
627 * The HPET timers specify which I/O APIC interrupts they can be routed to.
628 * Find the first available non-legacy-replacement timer and its I/O APIC irq.
629 * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
630 * timer's timer_n_config register.
631 */
632 static int
hpet_get_IOAPIC_intr_capable_timer(hpet_info_t * hip)633 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
634 {
635 int timer;
636 int intr;
637
638 for (timer = HPET_FIRST_NON_LEGACY_TIMER;
639 timer < hip->gen_cap.num_tim_cap; ++timer) {
640
641 if (!hpet_timer_available(hip->allocated_timers, timer))
642 continue;
643
644 intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
645 if (intr >= 0) {
646 hpet_timer_alloc(&hip->allocated_timers, timer);
647 hip->cstate_timer.timer = timer;
648 hip->cstate_timer.intr = intr;
649 return (timer);
650 }
651 }
652
653 return (-1);
654 }
655
656 /*
657 * Mark this timer as used.
658 */
659 static void
hpet_timer_alloc(uint32_t * allocated_timers,uint32_t n)660 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
661 {
662 *allocated_timers |= 1 << n;
663 }
664
665 /*
666 * Check if this timer is available.
667 * No mutual exclusion because only one thread uses this.
668 */
669 static int
hpet_timer_available(uint32_t allocated_timers,uint32_t n)670 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
671 {
672 return ((allocated_timers & (1 << n)) == 0);
673 }
674
675 /*
676 * Setup timer N to route its interrupt to I/O APIC.
677 */
678 static void
hpet_timer_set_up(hpet_info_t * hip,uint32_t timer_n,uint32_t interrupt)679 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
680 {
681 uint64_t conf;
682
683 conf = hpet_read_timer_N_config(hip, timer_n);
684
685 /*
686 * Caller is required to verify this interrupt route is supported.
687 */
688 ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
689
690 conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT; /* use IOAPIC */
691 conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
692 conf &= ~HPET_TIMER_N_TYPE_CNF_BIT; /* non periodic */
693 conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT; /* disabled */
694 conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT; /* Level Triggered */
695
696 hpet_write_timer_N_config(hip, timer_n, conf);
697 }
698
699 /*
700 * The HPET's Main Counter is not stopped before programming an HPET timer.
701 * This will allow the HPET to be used as a time source.
702 * The programmed timer interrupt may occur before this function returns.
703 * Callers must block interrupts before calling this function if they must
704 * guarantee the interrupt is handled after this function returns.
705 *
706 * Return 0 if main counter is less than timer after enabling timer.
707 * The interrupt was programmed, but it may fire before this returns.
708 * Return !0 if main counter is greater than timer after enabling timer.
709 * In other words: the timer will not fire, and we do not know if it did fire.
710 *
711 * delta is in HPET ticks.
712 *
713 * Writing a 64-bit value to a 32-bit register will "wrap around".
714 * A 32-bit HPET timer will wrap around in a little over 5 minutes.
715 */
716 int
hpet_timer_program(hpet_info_t * hip,uint32_t timer,uint64_t delta)717 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
718 {
719 uint64_t time, program;
720
721 program = hpet_read_main_counter_value(hip);
722 program += delta;
723 hpet_write_timer_N_comp(hip, timer, program);
724
725 time = hpet_read_main_counter_value(hip);
726 if (time < program)
727 return (AE_OK);
728
729 return (AE_TIME);
730 }
731
732 /*
733 * CPR and power policy-change callback entry point.
734 */
735 boolean_t
hpet_callback(int code)736 hpet_callback(int code)
737 {
738 switch (code) {
739 case PM_DEFAULT_CPU_DEEP_IDLE:
740 /*FALLTHROUGH*/
741 case PM_ENABLE_CPU_DEEP_IDLE:
742 /*FALLTHROUGH*/
743 case PM_DISABLE_CPU_DEEP_IDLE:
744 return (hpet_deep_idle_config(code));
745
746 case CB_CODE_CPR_RESUME:
747 /*FALLTHROUGH*/
748 case CB_CODE_CPR_CHKPT:
749 return (hpet_cpr(code));
750
751 case CST_EVENT_MULTIPLE_CSTATES:
752 hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
753 return (B_TRUE);
754
755 case CST_EVENT_ONE_CSTATE:
756 hpet_cst_callback(CST_EVENT_ONE_CSTATE);
757 return (B_TRUE);
758
759 default:
760 cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
761 return (B_FALSE);
762 }
763 }
764
765 /*
766 * According to the HPET spec 1.0a: the Operating System must save and restore
767 * HPET event timer hardware context through ACPI sleep state transitions.
768 * Timer registers (including the main counter) may not be preserved through
769 * ACPI S3, S4, or S5 sleep states. This code does not not support S1 nor S2.
770 *
771 * Current HPET state is already in hpet.supported and
772 * hpet_state.proxy_installed. hpet_info contains the proxy interrupt HPET
773 * Timer state.
774 *
775 * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
776 * S4, and it is not saved/restored here. Future projects cannot expect the
777 * Main Counter to be monotomically (or accurately) increasing across CPR.
778 *
779 * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
780 * CPUs are awake and in a spin loop before the system suspends. The HPET is
781 * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
782 * It is safe to leave the HPET running as the system suspends; we just
783 * disable the timer from generating interrupts here.
784 */
785 static boolean_t
hpet_cpr(int code)786 hpet_cpr(int code)
787 {
788 ulong_t intr, dead_count = 0;
789 hrtime_t dead = gethrtime() + hpet_spin_timeout;
790 boolean_t ret = B_TRUE;
791
792 mutex_enter(&hpet_state_lock);
793 switch (code) {
794 case CB_CODE_CPR_CHKPT:
795 if (hpet_state.proxy_installed == B_FALSE)
796 break;
797
798 hpet_state.cpr = B_TRUE;
799
800 intr = intr_clear();
801 while (!mutex_tryenter(&hpet_proxy_lock)) {
802 /*
803 * spin
804 */
805 intr_restore(intr);
806 if (dead_count++ > hpet_spin_check) {
807 dead_count = 0;
808 if (gethrtime() > dead) {
809 hpet_state.cpr = B_FALSE;
810 mutex_exit(&hpet_state_lock);
811 cmn_err(CE_NOTE, "!hpet_cpr: deadman");
812 return (B_FALSE);
813 }
814 }
815 intr = intr_clear();
816 }
817 hpet_expire_all();
818 mutex_exit(&hpet_proxy_lock);
819 intr_restore(intr);
820
821 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
822 break;
823
824 case CB_CODE_CPR_RESUME:
825 if (hpet_resume() == B_TRUE)
826 hpet_state.cpr = B_FALSE;
827 else
828 cmn_err(CE_NOTE, "!hpet_resume failed.");
829 break;
830
831 default:
832 cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
833 ret = B_FALSE;
834 break;
835 }
836 mutex_exit(&hpet_state_lock);
837 return (ret);
838 }
839
840 /*
841 * Assume the HPET stopped in Suspend state and timer state was lost.
842 */
843 static boolean_t
hpet_resume(void)844 hpet_resume(void)
845 {
846 if (hpet.supported != HPET_TIMER_SUPPORT)
847 return (B_TRUE);
848
849 /*
850 * The HPET spec does not specify if Legacy Replacement Route is
851 * on or off by default, so we set it off here.
852 */
853 (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
854
855 if (hpet_start_main_counter(&hpet_info) != AE_OK) {
856 cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
857 hpet.supported = HPET_NO_SUPPORT;
858 if (hpet_state.proxy_installed == B_TRUE) {
859 hpet_state.proxy_installed = B_FALSE;
860 hpet_uninstall_interrupt_handler();
861 }
862 return (B_FALSE);
863 }
864
865 if (hpet_state.proxy_installed == B_FALSE)
866 return (B_TRUE);
867
868 hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
869 hpet_info.cstate_timer.intr);
870 if (hpet_state.cpu_deep_idle == B_TRUE)
871 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
872
873 return (B_TRUE);
874 }
875
876 /*
877 * Callback to enable/disable Deep C-States based on power.conf setting.
878 */
879 static boolean_t
hpet_deep_idle_config(int code)880 hpet_deep_idle_config(int code)
881 {
882 ulong_t intr, dead_count = 0;
883 hrtime_t dead = gethrtime() + hpet_spin_timeout;
884 boolean_t ret = B_TRUE;
885
886 mutex_enter(&hpet_state_lock);
887 switch (code) {
888 case PM_DEFAULT_CPU_DEEP_IDLE:
889 /*FALLTHROUGH*/
890 case PM_ENABLE_CPU_DEEP_IDLE:
891
892 if (hpet_state.cpu_deep_idle == B_TRUE)
893 break;
894
895 if (hpet_state.proxy_installed == B_FALSE) {
896 ret = B_FALSE; /* Deep C-States not supported */
897 break;
898 }
899
900 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
901 hpet_state.cpu_deep_idle = B_TRUE;
902 break;
903
904 case PM_DISABLE_CPU_DEEP_IDLE:
905
906 if ((hpet_state.cpu_deep_idle == B_FALSE) ||
907 (hpet_state.proxy_installed == B_FALSE))
908 break;
909
910 /*
911 * The order of these operations is important to avoid
912 * lost wakeups: Set a flag to refuse all future LAPIC Timer
913 * proxy requests, then wake up all CPUs from deep C-state,
914 * and finally disable the HPET interrupt-generating timer.
915 */
916 hpet_state.cpu_deep_idle = B_FALSE;
917
918 intr = intr_clear();
919 while (!mutex_tryenter(&hpet_proxy_lock)) {
920 /*
921 * spin
922 */
923 intr_restore(intr);
924 if (dead_count++ > hpet_spin_check) {
925 dead_count = 0;
926 if (gethrtime() > dead) {
927 hpet_state.cpu_deep_idle = B_TRUE;
928 mutex_exit(&hpet_state_lock);
929 cmn_err(CE_NOTE,
930 "!hpet_deep_idle_config: deadman");
931 return (B_FALSE);
932 }
933 }
934 intr = intr_clear();
935 }
936 hpet_expire_all();
937 mutex_exit(&hpet_proxy_lock);
938 intr_restore(intr);
939
940 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
941 break;
942
943 default:
944 cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
945 code);
946 ret = B_FALSE;
947 break;
948 }
949 mutex_exit(&hpet_state_lock);
950
951 return (ret);
952 }
953
954 /*
955 * Callback for _CST c-state change notifications.
956 */
957 static void
hpet_cst_callback(uint32_t code)958 hpet_cst_callback(uint32_t code)
959 {
960 ulong_t intr, dead_count = 0;
961 hrtime_t dead = gethrtime() + hpet_spin_timeout;
962
963 switch (code) {
964 case CST_EVENT_ONE_CSTATE:
965 hpet_state.uni_cstate = B_TRUE;
966 intr = intr_clear();
967 while (!mutex_tryenter(&hpet_proxy_lock)) {
968 /*
969 * spin
970 */
971 intr_restore(intr);
972 if (dead_count++ > hpet_spin_check) {
973 dead_count = 0;
974 if (gethrtime() > dead) {
975 hpet_expire_all();
976 cmn_err(CE_NOTE,
977 "!hpet_cst_callback: deadman");
978 return;
979 }
980 }
981 intr = intr_clear();
982 }
983 hpet_expire_all();
984 mutex_exit(&hpet_proxy_lock);
985 intr_restore(intr);
986 break;
987
988 case CST_EVENT_MULTIPLE_CSTATES:
989 hpet_state.uni_cstate = B_FALSE;
990 break;
991
992 default:
993 cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
994 break;
995 }
996 }
997
998 /*
999 * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1000 * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1001 * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1002 * needed.
1003 */
1004 /* ARGSUSED */
1005 static uint_t
hpet_isr(char * arg)1006 hpet_isr(char *arg)
1007 {
1008 uint64_t timer_status;
1009 uint64_t timer_mask;
1010 ulong_t intr, dead_count = 0;
1011 hrtime_t dead = gethrtime() + hpet_isr_spin_timeout;
1012
1013 timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1014
1015 /*
1016 * We are using a level-triggered interrupt.
1017 * HPET sets timer's General Interrupt Status Register bit N.
1018 * ISR checks this bit to see if it needs servicing.
1019 * ISR then clears this bit by writing 1 to that bit.
1020 */
1021 timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1022 if (!(timer_status & timer_mask))
1023 return (DDI_INTR_UNCLAIMED);
1024 hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1025
1026 /*
1027 * Do not touch ISR data structures before checking the HPET's General
1028 * Interrupt Status register. The General Interrupt Status register
1029 * will not be set by hardware until after timer interrupt generation
1030 * is enabled by software. Software allocates necessary data
1031 * structures before enabling timer interrupts. ASSERT the software
1032 * data structures required to handle this interrupt are initialized.
1033 */
1034 ASSERT(hpet_proxy_users != NULL);
1035
1036 /*
1037 * CPUs in deep c-states do not enable interrupts until after
1038 * performing idle cleanup which includes descheduling themselves from
1039 * the HPET. The CPU running this ISR will NEVER find itself in the
1040 * proxy list. A lost wakeup may occur if this is false.
1041 */
1042 ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1043
1044 /*
1045 * Higher level interrupts may deadlock with CPUs going idle if this
1046 * ISR is prempted while holding hpet_proxy_lock.
1047 */
1048 intr = intr_clear();
1049 while (!mutex_tryenter(&hpet_proxy_lock)) {
1050 /*
1051 * spin
1052 */
1053 intr_restore(intr);
1054 if (dead_count++ > hpet_spin_check) {
1055 dead_count = 0;
1056 if (gethrtime() > dead) {
1057 hpet_expire_all();
1058 return (DDI_INTR_CLAIMED);
1059 }
1060 }
1061 intr = intr_clear();
1062 }
1063 (void) hpet_guaranteed_schedule(HPET_INFINITY);
1064 mutex_exit(&hpet_proxy_lock);
1065 intr_restore(intr);
1066
1067 return (DDI_INTR_CLAIMED);
1068 }
1069
1070 /*
1071 * Used when disabling the HPET Timer interrupt. CPUs in Deep C-state must be
1072 * woken up because they can no longer rely on the HPET's Timer to wake them.
1073 * We do not need to wait for CPUs to wakeup.
1074 */
1075 static void
hpet_expire_all(void)1076 hpet_expire_all(void)
1077 {
1078 processorid_t id;
1079
1080 for (id = 0; id < max_ncpus; ++id) {
1081 if (hpet_proxy_users[id] != HPET_INFINITY) {
1082 hpet_proxy_users[id] = HPET_INFINITY;
1083 if (id != CPU->cpu_id)
1084 poke_cpu(id);
1085 }
1086 }
1087 }
1088
1089 /*
1090 * To avoid missed wakeups this function must guarantee either the HPET timer
1091 * was successfully programmed to the next expire time or there are no waiting
1092 * CPUs.
1093 *
1094 * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1095 * generate its next interrupt to happen at required_wakeup_time or sooner.
1096 * Returns B_TRUE if the HPET was programmed to interrupt by
1097 * required_wakeup_time, B_FALSE if not.
1098 */
1099 static boolean_t
hpet_guaranteed_schedule(hrtime_t required_wakeup_time)1100 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1101 {
1102 hrtime_t now, next_proxy_time;
1103 processorid_t id, next_proxy_id;
1104 int proxy_timer = hpet_info.cstate_timer.timer;
1105 boolean_t done = B_FALSE;
1106
1107 ASSERT(mutex_owned(&hpet_proxy_lock));
1108
1109 /*
1110 * Loop until we successfully program the HPET,
1111 * or no CPUs are scheduled to use the HPET as a proxy.
1112 */
1113 do {
1114 /*
1115 * Wake all CPUs that expired before now.
1116 * Find the next CPU to wake up and next HPET program time.
1117 */
1118 now = gethrtime();
1119 next_proxy_time = HPET_INFINITY;
1120 next_proxy_id = CPU->cpu_id;
1121 for (id = 0; id < max_ncpus; ++id) {
1122 if (hpet_proxy_users[id] < now) {
1123 hpet_proxy_users[id] = HPET_INFINITY;
1124 if (id != CPU->cpu_id)
1125 poke_cpu(id);
1126 } else if (hpet_proxy_users[id] < next_proxy_time) {
1127 next_proxy_time = hpet_proxy_users[id];
1128 next_proxy_id = id;
1129 }
1130 }
1131
1132 if (next_proxy_time == HPET_INFINITY) {
1133 done = B_TRUE;
1134 /*
1135 * There are currently no CPUs using the HPET's Timer
1136 * as a proxy for their LAPIC Timer. The HPET's Timer
1137 * does not need to be programmed.
1138 *
1139 * Letting the HPET timer wrap around to the current
1140 * time is the longest possible timeout.
1141 * A 64-bit timer will wrap around in ~ 2^44 seconds.
1142 * A 32-bit timer will wrap around in ~ 2^12 seconds.
1143 *
1144 * Disabling the HPET's timer interrupt requires a
1145 * (relatively expensive) write to the HPET.
1146 * Instead we do nothing.
1147 *
1148 * We are gambling some CPU will attempt to enter a
1149 * deep c-state before the timer wraps around.
1150 * We assume one spurious interrupt in a little over an
1151 * hour has less performance impact than writing to the
1152 * HPET's timer disable bit every time all CPUs wakeup
1153 * from deep c-state.
1154 */
1155
1156 } else {
1157 /*
1158 * Idle CPUs disable interrupts before programming the
1159 * HPET to prevent a lost wakeup if the HPET
1160 * interrupts the idle cpu before it can enter a
1161 * Deep C-State.
1162 */
1163 if (hpet_timer_program(&hpet_info, proxy_timer,
1164 HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1165 != AE_OK) {
1166 /*
1167 * We could not program the HPET to wakeup the
1168 * next CPU. We must wake the CPU ourself to
1169 * avoid a lost wakeup.
1170 */
1171 hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1172 if (next_proxy_id != CPU->cpu_id)
1173 poke_cpu(next_proxy_id);
1174 } else {
1175 done = B_TRUE;
1176 }
1177 }
1178
1179 } while (!done);
1180
1181 return (next_proxy_time <= required_wakeup_time);
1182 }
1183
1184 /*
1185 * Use an HPET timer to act as this CPU's proxy local APIC timer.
1186 * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1187 * Called by the idle thread with interrupts enabled.
1188 * Always returns with interrupts disabled.
1189 *
1190 * There are 3 possible outcomes from this function:
1191 * 1. The Local APIC Timer was already disabled before this function was called.
1192 * LAPIC TIMER : disabled
1193 * HPET : not scheduled to wake this CPU
1194 * *lapic_expire : (hrtime_t)HPET_INFINITY
1195 * Returns : B_TRUE
1196 * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1197 * LAPIC TIMER : disabled
1198 * HPET : scheduled to wake this CPU
1199 * *lapic_expire : hrtime_t when LAPIC timer would have expired
1200 * Returns : B_TRUE
1201 * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1202 * LAPIC TIMER : enabled
1203 * HPET : not scheduled to wake this CPU
1204 * *lapic_expire : (hrtime_t)HPET_INFINITY
1205 * Returns : B_FALSE
1206 *
1207 * The idle thread cannot enter Deep C-State in case 3.
1208 * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1209 */
1210 static boolean_t
hpet_use_hpet_timer(hrtime_t * lapic_expire)1211 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1212 {
1213 extern hrtime_t apic_timer_stop_count(void);
1214 extern void apic_timer_restart(hrtime_t);
1215 hrtime_t now, expire, dead;
1216 uint64_t lapic_count, dead_count;
1217 cpupart_t *cpu_part;
1218 processorid_t cpu_sid;
1219 processorid_t cpu_id = CPU->cpu_id;
1220 processorid_t id;
1221 boolean_t rslt;
1222 boolean_t hset_update;
1223
1224 cpu_part = CPU->cpu_part;
1225 cpu_sid = CPU->cpu_seqid;
1226
1227 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1228
1229 /*
1230 * A critical section exists between when the HPET is programmed
1231 * to interrupt the CPU and when this CPU enters an idle state.
1232 * Interrupts must be blocked during that time to prevent lost
1233 * CBE wakeup interrupts from either LAPIC or HPET.
1234 *
1235 * Must block interrupts before acquiring hpet_proxy_lock to prevent
1236 * a deadlock with the ISR if the ISR runs on this CPU after the
1237 * idle thread acquires the mutex but before it clears interrupts.
1238 */
1239 ASSERT(!interrupts_enabled());
1240 lapic_count = apic_timer_stop_count();
1241 now = gethrtime();
1242 dead = now + hpet_idle_spin_timeout;
1243 *lapic_expire = expire = now + lapic_count;
1244 if (lapic_count == (hrtime_t)-1) {
1245 /*
1246 * LAPIC timer is currently disabled.
1247 * Will not use the HPET as a LAPIC Timer proxy.
1248 */
1249 *lapic_expire = (hrtime_t)HPET_INFINITY;
1250 return (B_TRUE);
1251 }
1252
1253 /*
1254 * Serialize hpet_proxy data structure manipulation.
1255 */
1256 dead_count = 0;
1257 while (!mutex_tryenter(&hpet_proxy_lock)) {
1258 /*
1259 * spin
1260 */
1261 apic_timer_restart(expire);
1262 sti();
1263 cli();
1264
1265 if (dead_count++ > hpet_spin_check) {
1266 dead_count = 0;
1267 hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1268 (ncpus > 1));
1269 if (hset_update &&
1270 !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1271 *lapic_expire = (hrtime_t)HPET_INFINITY;
1272 return (B_FALSE);
1273 }
1274 }
1275
1276 lapic_count = apic_timer_stop_count();
1277 now = gethrtime();
1278 *lapic_expire = expire = now + lapic_count;
1279 if (lapic_count == (hrtime_t)-1) {
1280 /*
1281 * LAPIC timer is currently disabled.
1282 * Will not use the HPET as a LAPIC Timer proxy.
1283 */
1284 *lapic_expire = (hrtime_t)HPET_INFINITY;
1285 return (B_TRUE);
1286 }
1287 if (now > dead) {
1288 apic_timer_restart(expire);
1289 *lapic_expire = (hrtime_t)HPET_INFINITY;
1290 return (B_FALSE);
1291 }
1292 }
1293
1294 if ((hpet_state.cpr == B_TRUE) ||
1295 (hpet_state.cpu_deep_idle == B_FALSE) ||
1296 (hpet_state.proxy_installed == B_FALSE) ||
1297 (hpet_state.uni_cstate == B_TRUE)) {
1298 mutex_exit(&hpet_proxy_lock);
1299 apic_timer_restart(expire);
1300 *lapic_expire = (hrtime_t)HPET_INFINITY;
1301 return (B_FALSE);
1302 }
1303
1304 hpet_proxy_users[cpu_id] = expire;
1305
1306 /*
1307 * We are done if another cpu is scheduled on the HPET with an
1308 * expire time before us. The next HPET interrupt has been programmed
1309 * to fire before our expire time.
1310 */
1311 for (id = 0; id < max_ncpus; ++id) {
1312 if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1313 mutex_exit(&hpet_proxy_lock);
1314 return (B_TRUE);
1315 }
1316 }
1317
1318 /*
1319 * We are the next lAPIC to expire.
1320 * Program the HPET with our expire time.
1321 */
1322 rslt = hpet_guaranteed_schedule(expire);
1323 mutex_exit(&hpet_proxy_lock);
1324
1325 if (rslt == B_FALSE) {
1326 apic_timer_restart(expire);
1327 *lapic_expire = (hrtime_t)HPET_INFINITY;
1328 }
1329
1330 return (rslt);
1331 }
1332
1333 /*
1334 * Called by the idle thread when waking up from Deep C-state before enabling
1335 * interrupts. With an array data structure it is faster to always remove
1336 * ourself from the array without checking if the HPET ISR already removed.
1337 *
1338 * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1339 * We do not reprogram the HPET here because this CPU has real work to do.
1340 * On a idle system the CPU was probably woken up by the HPET's ISR.
1341 * On a heavily loaded system CPUs are not going into Deep C-state.
1342 * On a moderately loaded system another CPU will usually enter Deep C-state
1343 * and reprogram the HPET before the HPET fires with our wakeup.
1344 */
1345 static void
hpet_use_lapic_timer(hrtime_t expire)1346 hpet_use_lapic_timer(hrtime_t expire)
1347 {
1348 extern void apic_timer_restart(hrtime_t);
1349 processorid_t cpu_id = CPU->cpu_id;
1350
1351 ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1352 ASSERT(!interrupts_enabled());
1353
1354 hpet_proxy_users[cpu_id] = HPET_INFINITY;
1355
1356 /*
1357 * Do not enable a LAPIC Timer that was initially disabled.
1358 */
1359 if (expire != HPET_INFINITY)
1360 apic_timer_restart(expire);
1361 }
1362
1363 /*
1364 * Initialize data structure to keep track of CPUs using HPET as a proxy for
1365 * their stalled local APIC timer. For now this is just an array.
1366 */
1367 static void
hpet_init_proxy_data(void)1368 hpet_init_proxy_data(void)
1369 {
1370 processorid_t id;
1371
1372 /*
1373 * Use max_ncpus for hot plug compliance.
1374 */
1375 hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1376 KM_SLEEP);
1377
1378 /*
1379 * Unused entries always contain HPET_INFINITY.
1380 */
1381 for (id = 0; id < max_ncpus; ++id)
1382 hpet_proxy_users[id] = HPET_INFINITY;
1383 }
1384