1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, Intel Corporation.
23 * All rights reserved.
24 */
25
26 #include <sys/atomic.h>
27 #include <sys/cpuvar.h>
28 #include <sys/cpu.h>
29 #include <sys/cpu_event.h>
30 #include <sys/cmn_err.h>
31 #include <sys/ddi.h>
32 #include <sys/kmem.h>
33 #include <sys/kstat.h>
34 #include <sys/pci.h>
35 #include <sys/sunddi.h>
36 #include <sys/sunndi.h>
37 #include <sys/synch.h>
38 #include <sys/sysmacros.h>
39 #include <sys/fipe.h>
40 #include <vm/hat.h>
41
42 /* Current PM policy, configurable through /etc/system and fipe.conf. */
43 fipe_pm_policy_t fipe_pm_policy = FIPE_PM_POLICY_BALANCE;
44 int fipe_pm_throttle_level = 1;
45
46 /* Enable kstat support. */
47 #define FIPE_KSTAT_SUPPORT 1
48
49 /* Enable performance relative statistics. */
50 #define FIPE_KSTAT_DETAIL 1
51
52 /* Enable builtin IOAT driver if no IOAT driver is available. */
53 #define FIPE_IOAT_BUILTIN 0
54 #if defined(FIPE_IOAT_BUILTIN) && (FIPE_IOAT_BUILTIN == 0)
55 #undef FIPE_IOAT_BUILTIN
56 #endif
57
58 #ifdef FIPE_IOAT_BUILTIN
59 /* Use IOAT channel 3 to generate memory transactions. */
60 #define FIPE_IOAT_CHAN_CTRL 0x200
61 #define FIPE_IOAT_CHAN_STS_LO 0x204
62 #define FIPE_IOAT_CHAN_STS_HI 0x208
63 #define FIPE_IOAT_CHAN_ADDR_LO 0x20C
64 #define FIPE_IOAT_CHAN_ADDR_HI 0x210
65 #define FIPE_IOAT_CHAN_CMD 0x214
66 #define FIPE_IOAT_CHAN_ERR 0x228
67 #else /* FIPE_IOAT_BUILTIN */
68 #include <sys/dcopy.h>
69 #endif /* FIPE_IOAT_BUILTIN */
70
71 /* Memory controller relative PCI configuration constants. */
72 #define FIPE_MC_GBLACT 0x60
73 #define FIPE_MC_THRTLOW 0x64
74 #define FIPE_MC_THRTCTRL 0x67
75 #define FIPE_MC_THRTCTRL_HUNT 0x1
76
77 /* Hardware recommended values. */
78 #define FIPE_MC_MEMORY_OFFSET 1024
79 #define FIPE_MC_MEMORY_SIZE 128
80
81 /* Number of IOAT commands posted when entering idle. */
82 #define FIPE_IOAT_CMD_NUM 2
83
84 /* Resource allocation retry interval in microsecond. */
85 #define FIPE_IOAT_RETRY_INTERVAL (15 * 1000 * 1000)
86
87 /* Statistics update interval in nanosecond. */
88 #define FIPE_STAT_INTERVAL (10 * 1000 * 1000)
89
90 /* Configuration profile support. */
91 #define FIPE_PROFILE_FIELD(field) (fipe_profile_curr->field)
92 #define FIPE_PROF_IDLE_COUNT FIPE_PROFILE_FIELD(idle_count)
93 #define FIPE_PROF_BUSY_THRESHOLD FIPE_PROFILE_FIELD(busy_threshold)
94 #define FIPE_PROF_INTR_THRESHOLD FIPE_PROFILE_FIELD(intr_threshold)
95 #define FIPE_PROF_INTR_BUSY_THRESHOLD FIPE_PROFILE_FIELD(intr_busy_threshold)
96 #define FIPE_PROF_INTR_BUSY_THROTTLE FIPE_PROFILE_FIELD(intr_busy_throttle)
97
98 /* Priority assigned to FIPE memory power management driver on x86. */
99 #define CPU_IDLE_CB_PRIO_FIPE (CPU_IDLE_CB_PRIO_LOW_BASE + 0x4000000)
100
101 /* Structure to support power management profile. */
102 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_profiles)
103 static struct fipe_profile {
104 uint32_t idle_count;
105 uint32_t busy_threshold;
106 uint32_t intr_threshold;
107 uint32_t intr_busy_threshold;
108 uint32_t intr_busy_throttle;
109 } fipe_profiles[FIPE_PM_POLICY_MAX] = {
110 { 0, 0, 0, 0, 0 },
111 { 5, 30, 20, 50, 5 },
112 { 10, 40, 40, 75, 4 },
113 { 15, 50, 60, 100, 2 },
114 };
115
116 /* Structure to store memory controller relative data. */
117 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_mc_ctrl)
118 static struct fipe_mc_ctrl {
119 ddi_acc_handle_t mc_pci_hdl;
120 unsigned char mc_thrtctrl;
121 unsigned char mc_thrtlow;
122 unsigned char mc_gblact;
123 dev_info_t *mc_dip;
124 boolean_t mc_initialized;
125 } fipe_mc_ctrl;
126
127 /* Structure to store IOAT relative information. */
128 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_ioat_ctrl)
129 static struct fipe_ioat_control {
130 kmutex_t ioat_lock;
131 boolean_t ioat_ready;
132 #ifdef FIPE_IOAT_BUILTIN
133 boolean_t ioat_reg_mapped;
134 ddi_acc_handle_t ioat_reg_handle;
135 uint8_t *ioat_reg_addr;
136 uint64_t ioat_cmd_physaddr;
137 #else /* FIPE_IOAT_BUILTIN */
138 dcopy_cmd_t ioat_cmds[FIPE_IOAT_CMD_NUM + 1];
139 dcopy_handle_t ioat_handle;
140 #endif /* FIPE_IOAT_BUILTIN */
141 dev_info_t *ioat_dev_info;
142 uint64_t ioat_buf_physaddr;
143 char *ioat_buf_virtaddr;
144 char *ioat_buf_start;
145 size_t ioat_buf_size;
146 timeout_id_t ioat_timerid;
147 boolean_t ioat_failed;
148 boolean_t ioat_cancel;
149 boolean_t ioat_try_alloc;
150 } fipe_ioat_ctrl;
151
152 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_idle_ctrl)
153 static struct fipe_idle_ctrl {
154 boolean_t idle_ready;
155 cpu_idle_callback_handle_t cb_handle;
156 cpu_idle_prop_handle_t prop_enter;
157 cpu_idle_prop_handle_t prop_exit;
158 cpu_idle_prop_handle_t prop_busy;
159 cpu_idle_prop_handle_t prop_idle;
160 cpu_idle_prop_handle_t prop_intr;
161
162 /* Put here for cache efficiency, it should be in fipe_global_ctrl. */
163 hrtime_t tick_interval;
164 } fipe_idle_ctrl;
165
166 /*
167 * Global control structure.
168 * Solaris idle thread has no reentrance issue, so it's enough to count CPUs
169 * in idle state. Otherwise cpuset_t bitmap should be used to track idle CPUs.
170 */
171 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_gbl_ctrl)
172 static struct fipe_global_ctrl {
173 kmutex_t lock;
174 boolean_t pm_enabled;
175 volatile boolean_t pm_active;
176 volatile uint32_t cpu_count;
177 volatile uint64_t io_waiters;
178 hrtime_t enter_ts;
179 hrtime_t time_in_pm;
180 size_t state_size;
181 char *state_buf;
182 #ifdef FIPE_KSTAT_SUPPORT
183 kstat_t *fipe_kstat;
184 #endif /* FIPE_KSTAT_SUPPORT */
185 } fipe_gbl_ctrl;
186
187 #define FIPE_CPU_STATE_PAD (128 - \
188 2 * sizeof (boolean_t) - 4 * sizeof (hrtime_t) - \
189 2 * sizeof (uint64_t) - 2 * sizeof (uint32_t))
190
191 /* Per-CPU status. */
192 #pragma pack(1)
193 typedef struct fipe_cpu_state {
194 boolean_t cond_ready;
195 boolean_t state_ready;
196 uint32_t idle_count;
197 uint32_t throttle_cnt;
198 hrtime_t throttle_ts;
199 hrtime_t next_ts;
200 hrtime_t last_busy;
201 hrtime_t last_idle;
202 uint64_t last_intr;
203 uint64_t last_iowait;
204 char pad1[FIPE_CPU_STATE_PAD];
205 } fipe_cpu_state_t;
206 #pragma pack()
207
208 #ifdef FIPE_KSTAT_SUPPORT
209 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_kstat)
210 static struct fipe_kstat_s {
211 kstat_named_t fipe_enabled;
212 kstat_named_t fipe_policy;
213 kstat_named_t fipe_pm_time;
214 #ifdef FIPE_KSTAT_DETAIL
215 kstat_named_t ioat_ready;
216 kstat_named_t pm_tryenter_cnt;
217 kstat_named_t pm_success_cnt;
218 kstat_named_t pm_race_cnt;
219 kstat_named_t cpu_loop_cnt;
220 kstat_named_t cpu_busy_cnt;
221 kstat_named_t cpu_idle_cnt;
222 kstat_named_t cpu_intr_busy_cnt;
223 kstat_named_t cpu_intr_throttle_cnt;
224 kstat_named_t bio_busy_cnt;
225 kstat_named_t ioat_start_fail_cnt;
226 kstat_named_t ioat_stop_fail_cnt;
227 #endif /* FIPE_KSTAT_DETAIL */
228 } fipe_kstat = {
229 { "fipe_enabled", KSTAT_DATA_INT32 },
230 { "fipe_policy", KSTAT_DATA_INT32 },
231 { "fipe_pm_time", KSTAT_DATA_UINT64 },
232 #ifdef FIPE_KSTAT_DETAIL
233 { "ioat_ready", KSTAT_DATA_INT32 },
234 { "pm_tryenter_cnt", KSTAT_DATA_UINT64 },
235 { "pm_success_cnt", KSTAT_DATA_UINT64 },
236 { "pm_race_cnt", KSTAT_DATA_UINT64 },
237 { "cpu_loop_cnt", KSTAT_DATA_UINT64 },
238 { "cpu_busy_cnt", KSTAT_DATA_UINT64 },
239 { "cpu_idle_cnt", KSTAT_DATA_UINT64 },
240 { "cpu_intr_busy_cnt", KSTAT_DATA_UINT64 },
241 { "cpu_intr_thrt_cnt", KSTAT_DATA_UINT64 },
242 { "bio_busy_cnt", KSTAT_DATA_UINT64 },
243 { "ioat_start_fail_cnt", KSTAT_DATA_UINT64 },
244 { "ioat_stop_fail_cnt", KSTAT_DATA_UINT64 }
245 #endif /* FIPE_KSTAT_DETAIL */
246 };
247
248 #define FIPE_KSTAT_INC(v) \
249 atomic_inc_64(&fipe_kstat.v.value.ui64)
250 #ifdef FIPE_KSTAT_DETAIL
251 #define FIPE_KSTAT_DETAIL_INC(v) \
252 atomic_inc_64(&fipe_kstat.v.value.ui64)
253 #else /* FIPE_KSTAT_DETAIL */
254 #define FIPE_KSTAT_DETAIL_INC(v)
255 #endif /* FIPE_KSTAT_DETAIL */
256
257 #else /* FIPE_KSTAT_SUPPORT */
258
259 #define FIPE_KSTAT_INC(v)
260 #define FIPE_KSTAT_DETAIL_INC(v)
261
262 #endif /* FIPE_KSTAT_SUPPORT */
263
264 /* Save current power management profile during suspend/resume. */
265 static fipe_pm_policy_t fipe_pm_policy_saved = FIPE_PM_POLICY_BALANCE;
266 static fipe_cpu_state_t *fipe_cpu_states = NULL;
267
268 /*
269 * There is no lock to protect fipe_profile_curr, so fipe_profile_curr
270 * could change on threads in fipe_idle_enter. This is not an issue,
271 * as it always points to a valid profile, and though it might make
272 * an incorrect choice for the new profile, it will still be a valid
273 * selection, and would do the correct operation for the new profile on
274 * next cpu_idle_enter cycle. Since the selections would always be
275 * valid for some profile, the overhead for the lock is not wasted.
276 */
277 static struct fipe_profile *fipe_profile_curr = NULL;
278
279 static void fipe_idle_enter(void *arg, cpu_idle_callback_context_t ctx,
280 cpu_idle_check_wakeup_t check_func, void* check_arg);
281 static void fipe_idle_exit(void* arg, cpu_idle_callback_context_t ctx,
282 int flags);
283 static cpu_idle_callback_t fipe_idle_cb = {
284 CPU_IDLE_CALLBACK_VER0,
285 fipe_idle_enter,
286 fipe_idle_exit,
287 };
288
289 /*
290 * Configure memory controller into power saving mode:
291 * 1) OLTT activation limit is set to unlimited
292 * 2) MC works in S-CLTT mode
293 */
294 static int
fipe_mc_change(int throttle)295 fipe_mc_change(int throttle)
296 {
297 /* Enable OLTT/disable S-CLTT mode */
298 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL,
299 fipe_mc_ctrl.mc_thrtctrl & ~FIPE_MC_THRTCTRL_HUNT);
300 /* Set OLTT activation limit to unlimited */
301 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_GBLACT, 0);
302 /*
303 * Set S-CLTT low throttling to desired value. The lower value,
304 * the more power saving and the less available memory bandwidth.
305 */
306 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTLOW, throttle);
307 /* Enable S-CLTT/disable OLTT mode */
308 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL,
309 fipe_mc_ctrl.mc_thrtctrl | FIPE_MC_THRTCTRL_HUNT);
310
311 return (0);
312 }
313
314 /*
315 * Restore memory controller's original configuration.
316 */
317 static void
fipe_mc_restore(void)318 fipe_mc_restore(void)
319 {
320 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL,
321 fipe_mc_ctrl.mc_thrtctrl & ~FIPE_MC_THRTCTRL_HUNT);
322 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_GBLACT,
323 fipe_mc_ctrl.mc_gblact);
324 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTLOW,
325 fipe_mc_ctrl.mc_thrtlow);
326 pci_config_put8(fipe_mc_ctrl.mc_pci_hdl, FIPE_MC_THRTCTRL,
327 fipe_mc_ctrl.mc_thrtctrl);
328 }
329
330 /*
331 * Initialize memory controller's data structure and status.
332 */
333 static int
fipe_mc_init(dev_info_t * dip)334 fipe_mc_init(dev_info_t *dip)
335 {
336 ddi_acc_handle_t handle;
337
338 bzero(&fipe_mc_ctrl, sizeof (fipe_mc_ctrl));
339
340 /* Hold one reference count and will be released in fipe_mc_fini. */
341 ndi_hold_devi(dip);
342
343 /* Setup pci configuration handler. */
344 if (pci_config_setup(dip, &handle) != DDI_SUCCESS) {
345 cmn_err(CE_WARN,
346 "!fipe: failed to setup pcicfg handler in mc_init.");
347 ndi_rele_devi(dip);
348 return (-1);
349 }
350
351 /* Save original configuration. */
352 fipe_mc_ctrl.mc_thrtctrl = pci_config_get8(handle, FIPE_MC_THRTCTRL);
353 fipe_mc_ctrl.mc_thrtlow = pci_config_get8(handle, FIPE_MC_THRTLOW);
354 fipe_mc_ctrl.mc_gblact = pci_config_get8(handle, FIPE_MC_GBLACT);
355 fipe_mc_ctrl.mc_dip = dip;
356 fipe_mc_ctrl.mc_pci_hdl = handle;
357 fipe_mc_ctrl.mc_initialized = B_TRUE;
358
359 return (0);
360 }
361
362 /*
363 * Restore memory controller's configuration and release resources.
364 */
365 static void
fipe_mc_fini(void)366 fipe_mc_fini(void)
367 {
368 if (fipe_mc_ctrl.mc_initialized) {
369 fipe_mc_restore();
370 pci_config_teardown(&fipe_mc_ctrl.mc_pci_hdl);
371 ndi_rele_devi(fipe_mc_ctrl.mc_dip);
372 fipe_mc_ctrl.mc_initialized = B_FALSE;
373 }
374 bzero(&fipe_mc_ctrl, sizeof (fipe_mc_ctrl));
375 }
376
377 /* Search device with specific pci ids. */
378 struct fipe_pci_ioat_id {
379 uint16_t venid;
380 uint16_t devid;
381 uint16_t subvenid;
382 uint16_t subsysid;
383 char *unitaddr;
384 };
385
386 static struct fipe_pci_ioat_id fipe_pci_ioat_ids[] = {
387 { 0x8086, 0x1a38, 0xffff, 0xffff, NULL },
388 { 0x8086, 0x360b, 0xffff, 0xffff, NULL },
389 };
390
391 /*ARGSUSED*/
392 static int
fipe_search_ioat_dev(dev_info_t * dip,void * arg)393 fipe_search_ioat_dev(dev_info_t *dip, void *arg)
394 {
395 char *unit;
396 struct fipe_pci_ioat_id *id;
397 int i, max, venid, devid, subvenid, subsysid;
398
399 /* Query PCI id properties. */
400 venid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
401 "vendor-id", 0xffffffff);
402 if (venid == 0xffffffff) {
403 return (DDI_WALK_CONTINUE);
404 }
405 devid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
406 "device-id", 0xffffffff);
407 if (devid == 0xffffffff) {
408 return (DDI_WALK_CONTINUE);
409 }
410 subvenid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
411 "subsystem-vendor-id", 0xffffffff);
412 if (subvenid == 0xffffffff) {
413 return (DDI_WALK_CONTINUE);
414 }
415 subsysid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
416 "subsystem-id", 0xffffffff);
417 if (subvenid == 0xffffffff) {
418 return (DDI_WALK_CONTINUE);
419 }
420 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
421 "unit-address", &unit) != DDI_PROP_SUCCESS) {
422 return (DDI_WALK_CONTINUE);
423 }
424
425 max = sizeof (fipe_pci_ioat_ids) / sizeof (fipe_pci_ioat_ids[0]);
426 for (i = 0; i < max; i++) {
427 id = &fipe_pci_ioat_ids[i];
428 if ((id->venid == 0xffffu || id->venid == venid) &&
429 (id->devid == 0xffffu || id->devid == devid) &&
430 (id->subvenid == 0xffffu || id->subvenid == subvenid) &&
431 (id->subsysid == 0xffffu || id->subsysid == subsysid) &&
432 (id->unitaddr == NULL || strcmp(id->unitaddr, unit) == 0)) {
433 break;
434 }
435 }
436 ddi_prop_free(unit);
437 if (i >= max) {
438 return (DDI_WALK_CONTINUE);
439 }
440
441 /* Found IOAT device, hold one reference count. */
442 ndi_hold_devi(dip);
443 fipe_ioat_ctrl.ioat_dev_info = dip;
444
445 return (DDI_WALK_TERMINATE);
446 }
447
448 /*
449 * To enable FBDIMM idle power enhancement mechanism, IOAT will be used to
450 * generate enough memory traffic to trigger memory controller thermal throttle
451 * circuitry.
452 * If dcopy/ioat is available, we will use dcopy interface to communicate
453 * with IOAT. Otherwise the built-in driver will directly talk to IOAT
454 * hardware.
455 */
456 #ifdef FIPE_IOAT_BUILTIN
457 static int
fipe_ioat_trigger(void)458 fipe_ioat_trigger(void)
459 {
460 uint16_t ctrl;
461 uint32_t err;
462 uint8_t *addr = fipe_ioat_ctrl.ioat_reg_addr;
463 ddi_acc_handle_t handle = fipe_ioat_ctrl.ioat_reg_handle;
464
465 /* Check channel in use flag. */
466 ctrl = ddi_get16(handle, (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL));
467 if (ctrl & 0x100) {
468 /*
469 * Channel is in use by somebody else. IOAT driver may have
470 * been loaded, forbid fipe from accessing IOAT hardware
471 * anymore.
472 */
473 fipe_ioat_ctrl.ioat_ready = B_FALSE;
474 fipe_ioat_ctrl.ioat_failed = B_TRUE;
475 FIPE_KSTAT_INC(ioat_start_fail_cnt);
476 return (-1);
477 } else {
478 /* Set channel in use flag. */
479 ddi_put16(handle,
480 (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL), 0x100);
481 }
482
483 /* Write command address. */
484 ddi_put32(handle,
485 (uint32_t *)(addr + FIPE_IOAT_CHAN_ADDR_LO),
486 (uint32_t)fipe_ioat_ctrl.ioat_cmd_physaddr);
487 ddi_put32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ADDR_HI),
488 (uint32_t)(fipe_ioat_ctrl.ioat_cmd_physaddr >> 32));
489
490 /* Check and clear error flags. */
491 err = ddi_get32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ERR));
492 if (err != 0) {
493 ddi_put32(handle, (uint32_t *)(addr + FIPE_IOAT_CHAN_ERR), err);
494 }
495
496 /* Start channel. */
497 ddi_put8(handle, (uint8_t *)(addr + FIPE_IOAT_CHAN_CMD), 0x1);
498
499 return (0);
500 }
501
502 static void
fipe_ioat_cancel(void)503 fipe_ioat_cancel(void)
504 {
505 uint32_t status;
506 uint8_t *addr = fipe_ioat_ctrl.ioat_reg_addr;
507 ddi_acc_handle_t handle = fipe_ioat_ctrl.ioat_reg_handle;
508
509 /*
510 * Reset channel. Sometimes reset is not reliable,
511 * so check completion or abort status after reset.
512 */
513 /* LINTED: constant in conditional context */
514 while (1) {
515 /* Issue reset channel command. */
516 ddi_put8(handle, (uint8_t *)(addr + FIPE_IOAT_CHAN_CMD), 0x20);
517
518 /* Query command status. */
519 status = ddi_get32(handle,
520 (uint32_t *)(addr + FIPE_IOAT_CHAN_STS_LO));
521 if (status & 0x1) {
522 /* Reset channel completed. */
523 break;
524 } else {
525 SMT_PAUSE();
526 }
527 }
528
529 /* Put channel into "not in use" state. */
530 ddi_put16(handle, (uint16_t *)(addr + FIPE_IOAT_CHAN_CTRL), 0);
531 }
532
533 /*ARGSUSED*/
534 static void
fipe_ioat_alloc(void * arg)535 fipe_ioat_alloc(void *arg)
536 {
537 int rc = 0, nregs;
538 dev_info_t *dip;
539 ddi_device_acc_attr_t attr;
540 boolean_t fatal = B_FALSE;
541
542 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
543 /*
544 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded.
545 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just
546 * schedule a timer and exit.
547 */
548 if (fipe_ioat_ctrl.ioat_try_alloc == B_FALSE) {
549 fipe_ioat_ctrl.ioat_try_alloc = B_TRUE;
550 goto out_error;
551 }
552
553 /* Check whether has been initialized or encountered permanent error. */
554 if (fipe_ioat_ctrl.ioat_ready || fipe_ioat_ctrl.ioat_failed ||
555 fipe_ioat_ctrl.ioat_cancel) {
556 fipe_ioat_ctrl.ioat_timerid = 0;
557 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
558 return;
559 }
560
561 if (fipe_ioat_ctrl.ioat_dev_info == NULL) {
562 /* Find dev_info_t for IOAT engine. */
563 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev, NULL);
564 if (fipe_ioat_ctrl.ioat_dev_info == NULL) {
565 cmn_err(CE_NOTE,
566 "!fipe: no IOAT hardware found, disable pm.");
567 fatal = B_TRUE;
568 goto out_error;
569 }
570 }
571
572 /* Map in IOAT control register window. */
573 ASSERT(fipe_ioat_ctrl.ioat_dev_info != NULL);
574 ASSERT(fipe_ioat_ctrl.ioat_reg_mapped == B_FALSE);
575 dip = fipe_ioat_ctrl.ioat_dev_info;
576 if (ddi_dev_nregs(dip, &nregs) != DDI_SUCCESS || nregs < 2) {
577 cmn_err(CE_WARN, "!fipe: ioat has not enough register bars.");
578 fatal = B_TRUE;
579 goto out_error;
580 }
581 attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
582 attr.devacc_attr_endian_flags = DDI_NEVERSWAP_ACC;
583 attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
584 rc = ddi_regs_map_setup(dip, 1,
585 (caddr_t *)&fipe_ioat_ctrl.ioat_reg_addr,
586 0, 0, &attr, &fipe_ioat_ctrl.ioat_reg_handle);
587 if (rc != DDI_SUCCESS) {
588 cmn_err(CE_WARN, "!fipe: failed to map IOAT registeres.");
589 fatal = B_TRUE;
590 goto out_error;
591 }
592
593 /* Mark IOAT status. */
594 fipe_ioat_ctrl.ioat_reg_mapped = B_TRUE;
595 fipe_ioat_ctrl.ioat_ready = B_TRUE;
596 fipe_ioat_ctrl.ioat_failed = B_FALSE;
597 fipe_ioat_ctrl.ioat_timerid = 0;
598 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
599
600 return;
601
602 out_error:
603 fipe_ioat_ctrl.ioat_timerid = 0;
604 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) {
605 if (fatal) {
606 /* Mark permanent error and give up. */
607 fipe_ioat_ctrl.ioat_failed = B_TRUE;
608 /* Release reference count hold by ddi_find_devinfo. */
609 if (fipe_ioat_ctrl.ioat_dev_info != NULL) {
610 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info);
611 fipe_ioat_ctrl.ioat_dev_info = NULL;
612 }
613 } else {
614 /*
615 * Schedule another timer to keep on trying.
616 * timeout() should always succeed, no need to check
617 * return.
618 */
619 fipe_ioat_ctrl.ioat_timerid = timeout(fipe_ioat_alloc,
620 NULL, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL));
621 }
622 }
623 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
624 }
625
626 static void
fipe_ioat_free(void)627 fipe_ioat_free(void)
628 {
629 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
630 /* Cancel timeout to avoid race condition. */
631 if (fipe_ioat_ctrl.ioat_timerid != 0) {
632 fipe_ioat_ctrl.ioat_cancel = B_TRUE;
633 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
634 (void) untimeout(fipe_ioat_ctrl.ioat_timerid);
635 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
636 fipe_ioat_ctrl.ioat_timerid = 0;
637 fipe_ioat_ctrl.ioat_cancel = B_FALSE;
638 }
639
640 if (fipe_ioat_ctrl.ioat_reg_mapped) {
641 ddi_regs_map_free(&fipe_ioat_ctrl.ioat_reg_handle);
642 fipe_ioat_ctrl.ioat_reg_mapped = B_FALSE;
643 }
644
645 fipe_ioat_ctrl.ioat_ready = B_FALSE;
646 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
647 }
648
649 #else /* FIPE_IOAT_BUILTIN */
650
651 /*
652 * Trigger IOAT memory copy operation when entering power saving state.
653 * A group of commands will be posted to IOAT driver and those commands
654 * will be placed into an IOAT ring buffer.
655 */
656 static int
fipe_ioat_trigger(void)657 fipe_ioat_trigger(void)
658 {
659 int idx;
660 dcopy_cmd_t *cmds = fipe_ioat_ctrl.ioat_cmds;
661
662 for (idx = FIPE_IOAT_CMD_NUM; idx > 0; idx--) {
663 if (dcopy_cmd_post(cmds[idx]) == DCOPY_SUCCESS) {
664 continue;
665 } else {
666 /*
667 * Don't rollback on failure, it doesn't hurt much more
668 * than some small memory copy operations.
669 */
670 FIPE_KSTAT_DETAIL_INC(ioat_start_fail_cnt);
671 return (-1);
672 }
673 }
674
675 return (0);
676 }
677
678 /*
679 * Cancel the memory copy operations posted by fipe_ioat_trigger.
680 * It's achieved by posting a new command which will break the ring
681 * created by fipe_ioat_trigger. If it fails, the best way to recover
682 * is to just let it go. IOAT will recover when posting next command
683 * on the same channel.
684 */
685 static void
fipe_ioat_cancel(void)686 fipe_ioat_cancel(void)
687 {
688 if (dcopy_cmd_post(fipe_ioat_ctrl.ioat_cmds[0]) != DCOPY_SUCCESS) {
689 FIPE_KSTAT_DETAIL_INC(ioat_stop_fail_cnt);
690 }
691 }
692
693 /*
694 * This function will be called from allocate IOAT resources.
695 * Allocation may fail due to following reasons:
696 * 1) IOAT driver hasn't been loaded yet. Keep on trying in this case.
697 * 2) IOAT resources are temporarily unavailable. Keep on trying in this case.
698 * 3) Other no recoverable reasons. Disable power management function.
699 */
700 /*ARGSUSED*/
701 static void
fipe_ioat_alloc(void * arg)702 fipe_ioat_alloc(void *arg)
703 {
704 int idx, flags, rc = 0;
705 uint64_t physaddr;
706 boolean_t fatal = B_FALSE;
707 dcopy_query_t info;
708 dcopy_handle_t handle;
709 dcopy_cmd_t cmds[FIPE_IOAT_CMD_NUM + 1];
710
711 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
712 /*
713 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded.
714 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just
715 * schedule a timer and exit.
716 */
717 if (fipe_ioat_ctrl.ioat_try_alloc == B_FALSE) {
718 fipe_ioat_ctrl.ioat_try_alloc = B_TRUE;
719 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
720 goto out_error;
721 }
722
723 /*
724 * Check whether device has been initialized or if it encountered
725 * some permanent error.
726 */
727 if (fipe_ioat_ctrl.ioat_ready || fipe_ioat_ctrl.ioat_failed ||
728 fipe_ioat_ctrl.ioat_cancel) {
729 fipe_ioat_ctrl.ioat_timerid = 0;
730 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
731 return;
732 }
733
734 if (fipe_ioat_ctrl.ioat_dev_info == NULL) {
735 /* Find dev_info_t for IOAT engine. */
736 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev, NULL);
737 if (fipe_ioat_ctrl.ioat_dev_info == NULL) {
738 cmn_err(CE_NOTE,
739 "!fipe: no IOAT hardware found, disable pm.");
740 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
741 fatal = B_TRUE;
742 goto out_error;
743 }
744 }
745 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
746
747 /* Check, allocate and initialize IOAT resources with lock released. */
748 dcopy_query(&info);
749 if (info.dq_version < DCOPY_QUERY_V0) {
750 /* Permanent error, give up. */
751 cmn_err(CE_WARN, "!fipe: IOAT driver version mismatch.");
752 fatal = B_TRUE;
753 goto out_error;
754 } else if (info.dq_num_channels == 0) {
755 /* IOAT driver hasn't been loaded, keep trying. */
756 goto out_error;
757 }
758
759 /* Allocate IOAT channel. */
760 rc = dcopy_alloc(DCOPY_NOSLEEP, &handle);
761 if (rc == DCOPY_NORESOURCES) {
762 /* Resource temporarily not available, keep trying. */
763 goto out_error;
764 } else if (rc != DCOPY_SUCCESS) {
765 /* Permanent error, give up. */
766 cmn_err(CE_WARN, "!fipe: failed to allocate IOAT channel.");
767 fatal = B_TRUE;
768 goto out_error;
769 }
770
771 /*
772 * Allocate multiple IOAT commands and organize them into a ring to
773 * loop forever. Commands number is determined by IOAT descriptor size
774 * and memory interleave pattern.
775 * cmd[0] is used break the loop and disable IOAT operation.
776 * cmd[1, FIPE_IOAT_CMD_NUM] are grouped into a ring and cmd[1] is the
777 * list head.
778 */
779 bzero(cmds, sizeof (cmds));
780 physaddr = fipe_ioat_ctrl.ioat_buf_physaddr;
781 for (idx = FIPE_IOAT_CMD_NUM; idx >= 0; idx--) {
782 /* Allocate IOAT commands. */
783 if (idx == 0 || idx == FIPE_IOAT_CMD_NUM) {
784 flags = DCOPY_NOSLEEP;
785 } else {
786 /*
787 * To link commands into a list, the initial value of
788 * cmd need to be set to next cmd on list.
789 */
790 flags = DCOPY_NOSLEEP | DCOPY_ALLOC_LINK;
791 cmds[idx] = cmds[idx + 1];
792 }
793 rc = dcopy_cmd_alloc(handle, flags, &cmds[idx]);
794 if (rc == DCOPY_NORESOURCES) {
795 goto out_freecmd;
796 } else if (rc != DCOPY_SUCCESS) {
797 /* Permanent error, give up. */
798 cmn_err(CE_WARN,
799 "!fipe: failed to allocate IOAT command.");
800 fatal = B_TRUE;
801 goto out_freecmd;
802 }
803
804 /* Disable src/dst snoop to improve CPU cache efficiency. */
805 cmds[idx]->dp_flags = DCOPY_CMD_NOSRCSNP | DCOPY_CMD_NODSTSNP;
806 /* Specially handle commands on the list. */
807 if (idx != 0) {
808 /* Disable IOAT status. */
809 cmds[idx]->dp_flags |= DCOPY_CMD_NOSTAT;
810 /* Disable waiting for resources. */
811 cmds[idx]->dp_flags |= DCOPY_CMD_NOWAIT;
812 if (idx == 1) {
813 /* The list head, chain command into loop. */
814 cmds[idx]->dp_flags |= DCOPY_CMD_LOOP;
815 } else {
816 /* Queue all other commands except head. */
817 cmds[idx]->dp_flags |= DCOPY_CMD_QUEUE;
818 }
819 }
820 cmds[idx]->dp_cmd = DCOPY_CMD_COPY;
821 cmds[idx]->dp.copy.cc_source = physaddr;
822 cmds[idx]->dp.copy.cc_dest = physaddr + FIPE_MC_MEMORY_OFFSET;
823 if (idx == 0) {
824 /*
825 * Command 0 is used to cancel memory copy by breaking
826 * the ring created in fipe_ioat_trigger().
827 * For efficiency, use the smallest memory copy size.
828 */
829 cmds[idx]->dp.copy.cc_size = 1;
830 } else {
831 cmds[idx]->dp.copy.cc_size = FIPE_MC_MEMORY_SIZE;
832 }
833 }
834
835 /* Update IOAT control status if it hasn't been initialized yet. */
836 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
837 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) {
838 fipe_ioat_ctrl.ioat_handle = handle;
839 for (idx = 0; idx <= FIPE_IOAT_CMD_NUM; idx++) {
840 fipe_ioat_ctrl.ioat_cmds[idx] = cmds[idx];
841 }
842 fipe_ioat_ctrl.ioat_ready = B_TRUE;
843 fipe_ioat_ctrl.ioat_failed = B_FALSE;
844 fipe_ioat_ctrl.ioat_timerid = 0;
845 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
846 return;
847 }
848 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
849 /* Initialized by another thread, fall through to free resources. */
850
851 out_freecmd:
852 if (cmds[0] != NULL) {
853 dcopy_cmd_free(&cmds[0]);
854 }
855 /* Only need to free head, dcopy will free all commands on the list. */
856 for (idx = 1; idx <= FIPE_IOAT_CMD_NUM; idx++) {
857 if (cmds[idx] != NULL) {
858 dcopy_cmd_free(&cmds[idx]);
859 break;
860 }
861 }
862 dcopy_free(&handle);
863
864 out_error:
865 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
866 fipe_ioat_ctrl.ioat_timerid = 0;
867 if (!fipe_ioat_ctrl.ioat_ready && !fipe_ioat_ctrl.ioat_cancel) {
868 if (fatal) {
869 /* Mark permanent error and give up. */
870 fipe_ioat_ctrl.ioat_failed = B_TRUE;
871 /* Release reference count hold by ddi_find_devinfo. */
872 if (fipe_ioat_ctrl.ioat_dev_info != NULL) {
873 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info);
874 fipe_ioat_ctrl.ioat_dev_info = NULL;
875 }
876 } else {
877 /*
878 * Schedule another timer to keep on trying.
879 * timeout() should always success, no need to check.
880 */
881 fipe_ioat_ctrl.ioat_timerid = timeout(fipe_ioat_alloc,
882 NULL, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL));
883 }
884 }
885 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
886 }
887
888 /*
889 * Free resources allocated in fipe_ioat_alloc.
890 */
891 static void
fipe_ioat_free(void)892 fipe_ioat_free(void)
893 {
894 int idx = 0;
895 dcopy_cmd_t *cmds = fipe_ioat_ctrl.ioat_cmds;
896
897 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
898
899 /* Cancel timeout to avoid race condition. */
900 if (fipe_ioat_ctrl.ioat_timerid != 0) {
901 fipe_ioat_ctrl.ioat_cancel = B_TRUE;
902 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
903 (void) untimeout(fipe_ioat_ctrl.ioat_timerid);
904 mutex_enter(&fipe_ioat_ctrl.ioat_lock);
905 fipe_ioat_ctrl.ioat_timerid = 0;
906 fipe_ioat_ctrl.ioat_cancel = B_FALSE;
907 }
908
909 /* Free ioat resources. */
910 if (fipe_ioat_ctrl.ioat_ready) {
911 if (cmds[0] != NULL) {
912 dcopy_cmd_free(&cmds[0]);
913 }
914 for (idx = 1; idx <= FIPE_IOAT_CMD_NUM; idx++) {
915 if (cmds[idx] != NULL) {
916 dcopy_cmd_free(&cmds[idx]);
917 break;
918 }
919 }
920 bzero(fipe_ioat_ctrl.ioat_cmds,
921 sizeof (fipe_ioat_ctrl.ioat_cmds));
922 dcopy_free(&fipe_ioat_ctrl.ioat_handle);
923 fipe_ioat_ctrl.ioat_handle = NULL;
924 fipe_ioat_ctrl.ioat_ready = B_FALSE;
925 }
926
927 /* Release reference count hold by ddi_find_devinfo. */
928 if (fipe_ioat_ctrl.ioat_dev_info != NULL) {
929 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info);
930 fipe_ioat_ctrl.ioat_dev_info = NULL;
931 }
932
933 mutex_exit(&fipe_ioat_ctrl.ioat_lock);
934 }
935 #endif /* FIPE_IOAT_BUILTIN */
936
937 /*
938 * Initialize IOAT relative resources.
939 */
940 static int
fipe_ioat_init(void)941 fipe_ioat_init(void)
942 {
943 char *buf;
944 size_t size;
945
946 bzero(&fipe_ioat_ctrl, sizeof (fipe_ioat_ctrl));
947 mutex_init(&fipe_ioat_ctrl.ioat_lock, NULL, MUTEX_DRIVER, NULL);
948
949 /*
950 * Allocate memory for IOAT memory copy operation.
951 * The allocated memory should be page aligned to achieve better power
952 * savings.
953 * Don't use ddi_dma_mem_alloc here to keep thing simple. This also
954 * makes quiesce easier.
955 */
956 size = PAGESIZE;
957 buf = kmem_zalloc(size, KM_SLEEP);
958 if ((intptr_t)buf & PAGEOFFSET) {
959 kmem_free(buf, PAGESIZE);
960 size <<= 1;
961 buf = kmem_zalloc(size, KM_SLEEP);
962 }
963 fipe_ioat_ctrl.ioat_buf_size = size;
964 fipe_ioat_ctrl.ioat_buf_start = buf;
965 buf = (char *)P2ROUNDUP((intptr_t)buf, PAGESIZE);
966 fipe_ioat_ctrl.ioat_buf_virtaddr = buf;
967 fipe_ioat_ctrl.ioat_buf_physaddr = hat_getpfnum(kas.a_hat, buf);
968 fipe_ioat_ctrl.ioat_buf_physaddr <<= PAGESHIFT;
969
970 #ifdef FIPE_IOAT_BUILTIN
971 {
972 uint64_t bufpa;
973 /* IOAT descriptor data structure copied from ioat.h. */
974 struct fipe_ioat_cmd_desc {
975 uint32_t dd_size;
976 uint32_t dd_ctrl;
977 uint64_t dd_src_paddr;
978 uint64_t dd_dest_paddr;
979 uint64_t dd_next_desc;
980 uint64_t dd_res4;
981 uint64_t dd_res5;
982 uint64_t dd_res6;
983 uint64_t dd_res7;
984 } *desc;
985
986 /*
987 * Build two IOAT command descriptors and chain them into ring.
988 * Control flags as below:
989 * 0x2: disable source snoop
990 * 0x4: disable destination snoop
991 * 0x0 << 24: memory copy operation
992 * The layout for command descriptors and memory buffers are
993 * organized for power saving effect, please don't change it.
994 */
995 buf = fipe_ioat_ctrl.ioat_buf_virtaddr;
996 bufpa = fipe_ioat_ctrl.ioat_buf_physaddr;
997 fipe_ioat_ctrl.ioat_cmd_physaddr = bufpa;
998
999 /* First command descriptor. */
1000 desc = (struct fipe_ioat_cmd_desc *)(buf);
1001 desc->dd_size = 128;
1002 desc->dd_ctrl = 0x6;
1003 desc->dd_src_paddr = bufpa + 2048;
1004 desc->dd_dest_paddr = bufpa + 3072;
1005 /* Point to second descriptor. */
1006 desc->dd_next_desc = bufpa + 64;
1007
1008 /* Second command descriptor. */
1009 desc = (struct fipe_ioat_cmd_desc *)(buf + 64);
1010 desc->dd_size = 128;
1011 desc->dd_ctrl = 0x6;
1012 desc->dd_src_paddr = bufpa + 2048;
1013 desc->dd_dest_paddr = bufpa + 3072;
1014 /* Point to first descriptor. */
1015 desc->dd_next_desc = bufpa;
1016 }
1017 #endif /* FIPE_IOAT_BUILTIN */
1018
1019 return (0);
1020 }
1021
1022 static void
fipe_ioat_fini(void)1023 fipe_ioat_fini(void)
1024 {
1025 /* Release reference count hold by ddi_find_devinfo. */
1026 if (fipe_ioat_ctrl.ioat_dev_info != NULL) {
1027 ndi_rele_devi(fipe_ioat_ctrl.ioat_dev_info);
1028 fipe_ioat_ctrl.ioat_dev_info = NULL;
1029 }
1030
1031 if (fipe_ioat_ctrl.ioat_buf_start != NULL) {
1032 ASSERT(fipe_ioat_ctrl.ioat_buf_size != 0);
1033 kmem_free(fipe_ioat_ctrl.ioat_buf_start,
1034 fipe_ioat_ctrl.ioat_buf_size);
1035 }
1036
1037 mutex_destroy(&fipe_ioat_ctrl.ioat_lock);
1038 bzero(&fipe_ioat_ctrl, sizeof (fipe_ioat_ctrl));
1039 }
1040
1041 static int
fipe_idle_start(void)1042 fipe_idle_start(void)
1043 {
1044 int rc;
1045
1046 if (fipe_idle_ctrl.idle_ready) {
1047 return (0);
1048 }
1049
1050 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_ENTER_TIMESTAMP,
1051 &fipe_idle_ctrl.prop_enter) != 0) {
1052 cmn_err(CE_WARN, "!fipe: failed to get enter_ts property.");
1053 return (-1);
1054 }
1055 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_EXIT_TIMESTAMP,
1056 &fipe_idle_ctrl.prop_exit) != 0) {
1057 cmn_err(CE_WARN, "!fipe: failed to get exit_ts property.");
1058 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1059 return (-1);
1060 }
1061 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_IDLE_TIME,
1062 &fipe_idle_ctrl.prop_idle) != 0) {
1063 cmn_err(CE_WARN, "!fipe: failed to get idle_time property.");
1064 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit);
1065 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1066 return (-1);
1067 }
1068 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_BUSY_TIME,
1069 &fipe_idle_ctrl.prop_busy) != 0) {
1070 cmn_err(CE_WARN, "!fipe: failed to get busy_time property.");
1071 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle);
1072 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit);
1073 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1074 return (-1);
1075 }
1076 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_INTERRUPT_COUNT,
1077 &fipe_idle_ctrl.prop_intr) != 0) {
1078 cmn_err(CE_WARN, "!fipe: failed to get intr_count property.");
1079 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy);
1080 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle);
1081 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit);
1082 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1083 return (-1);
1084 }
1085
1086 /* Register idle state notification callback. */
1087 rc = cpu_idle_register_callback(CPU_IDLE_CB_PRIO_FIPE, &fipe_idle_cb,
1088 NULL, &fipe_idle_ctrl.cb_handle);
1089 if (rc != 0) {
1090 cmn_err(CE_WARN, "!fipe: failed to register cpuidle callback.");
1091 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_intr);
1092 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy);
1093 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle);
1094 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit);
1095 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1096 return (-1);
1097 }
1098
1099 fipe_idle_ctrl.idle_ready = B_TRUE;
1100
1101 return (0);
1102 }
1103
1104 static int
fipe_idle_stop(void)1105 fipe_idle_stop(void)
1106 {
1107 int rc;
1108
1109 if (fipe_idle_ctrl.idle_ready == B_FALSE) {
1110 return (0);
1111 }
1112
1113 rc = cpu_idle_unregister_callback(fipe_idle_ctrl.cb_handle);
1114 if (rc != 0) {
1115 cmn_err(CE_WARN,
1116 "!fipe: failed to unregister cpuidle callback.");
1117 return (-1);
1118 }
1119
1120 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_intr);
1121 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_busy);
1122 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_idle);
1123 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_exit);
1124 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl.prop_enter);
1125
1126 fipe_idle_ctrl.idle_ready = B_FALSE;
1127
1128 return (0);
1129 }
1130
1131 #ifdef FIPE_KSTAT_SUPPORT
1132 static int
fipe_kstat_update(kstat_t * ksp,int rw)1133 fipe_kstat_update(kstat_t *ksp, int rw)
1134 {
1135 struct fipe_kstat_s *sp;
1136 hrtime_t hrt;
1137
1138 if (rw == KSTAT_WRITE) {
1139 return (EACCES);
1140 }
1141
1142 sp = ksp->ks_data;
1143 sp->fipe_enabled.value.i32 = fipe_gbl_ctrl.pm_enabled ? 1 : 0;
1144 sp->fipe_policy.value.i32 = fipe_pm_policy;
1145
1146 hrt = fipe_gbl_ctrl.time_in_pm;
1147 scalehrtime(&hrt);
1148 sp->fipe_pm_time.value.ui64 = (uint64_t)hrt;
1149
1150 #ifdef FIPE_KSTAT_DETAIL
1151 sp->ioat_ready.value.i32 = fipe_ioat_ctrl.ioat_ready ? 1 : 0;
1152 #endif /* FIPE_KSTAT_DETAIL */
1153
1154 return (0);
1155 }
1156 #endif /* FIPE_KSTAT_SUPPORT */
1157
1158 /*
1159 * Initialize memory power management subsystem.
1160 * Note: This function should only be called from ATTACH.
1161 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1162 */
1163 int
fipe_init(dev_info_t * dip)1164 fipe_init(dev_info_t *dip)
1165 {
1166 size_t nsize;
1167 hrtime_t hrt;
1168
1169 /* Initialize global control structure. */
1170 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl));
1171 mutex_init(&fipe_gbl_ctrl.lock, NULL, MUTEX_DRIVER, NULL);
1172
1173 /* Query power management policy from device property. */
1174 fipe_pm_policy = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
1175 FIPE_PROP_PM_POLICY, fipe_pm_policy);
1176 if (fipe_pm_policy < 0 || fipe_pm_policy >= FIPE_PM_POLICY_MAX) {
1177 cmn_err(CE_CONT,
1178 "?fipe: invalid power management policy %d.\n",
1179 fipe_pm_policy);
1180 fipe_pm_policy = FIPE_PM_POLICY_BALANCE;
1181 }
1182 fipe_profile_curr = &fipe_profiles[fipe_pm_policy];
1183
1184 /*
1185 * Compute unscaled hrtime value corresponding to FIPE_STAT_INTERVAL.
1186 * (1 << 36) should be big enough here.
1187 */
1188 hrt = 1ULL << 36;
1189 scalehrtime(&hrt);
1190 fipe_idle_ctrl.tick_interval = FIPE_STAT_INTERVAL * (1ULL << 36) / hrt;
1191
1192 if (fipe_mc_init(dip) != 0) {
1193 cmn_err(CE_WARN, "!fipe: failed to initialize mc state.");
1194 goto out_mc_error;
1195 }
1196 if (fipe_ioat_init() != 0) {
1197 cmn_err(CE_NOTE, "!fipe: failed to initialize ioat state.");
1198 goto out_ioat_error;
1199 }
1200
1201 /* Allocate per-CPU structure. */
1202 nsize = max_ncpus * sizeof (fipe_cpu_state_t);
1203 nsize += CPU_CACHE_COHERENCE_SIZE;
1204 fipe_gbl_ctrl.state_buf = kmem_zalloc(nsize, KM_SLEEP);
1205 fipe_gbl_ctrl.state_size = nsize;
1206 fipe_cpu_states = (fipe_cpu_state_t *)P2ROUNDUP(
1207 (intptr_t)fipe_gbl_ctrl.state_buf, CPU_CACHE_COHERENCE_SIZE);
1208
1209 #ifdef FIPE_KSTAT_SUPPORT
1210 fipe_gbl_ctrl.fipe_kstat = kstat_create("fipe", 0, "fipe-pm", "misc",
1211 KSTAT_TYPE_NAMED, sizeof (fipe_kstat) / sizeof (kstat_named_t),
1212 KSTAT_FLAG_VIRTUAL);
1213 if (fipe_gbl_ctrl.fipe_kstat == NULL) {
1214 cmn_err(CE_CONT, "?fipe: failed to create kstat object.\n");
1215 } else {
1216 fipe_gbl_ctrl.fipe_kstat->ks_lock = &fipe_gbl_ctrl.lock;
1217 fipe_gbl_ctrl.fipe_kstat->ks_data = &fipe_kstat;
1218 fipe_gbl_ctrl.fipe_kstat->ks_update = fipe_kstat_update;
1219 kstat_install(fipe_gbl_ctrl.fipe_kstat);
1220 }
1221 #endif /* FIPE_KSTAT_SUPPORT */
1222
1223 return (0);
1224
1225 out_ioat_error:
1226 fipe_mc_fini();
1227 out_mc_error:
1228 mutex_destroy(&fipe_gbl_ctrl.lock);
1229 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl));
1230
1231 return (-1);
1232 }
1233
1234 /*
1235 * Destroy memory power management subsystem.
1236 * Note: This function should only be called from DETACH.
1237 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1238 */
1239 int
fipe_fini(void)1240 fipe_fini(void)
1241 {
1242 if (fipe_gbl_ctrl.pm_enabled) {
1243 cmn_err(CE_NOTE, "!fipe: call fipe_fini without stopping PM.");
1244 return (EBUSY);
1245 }
1246
1247 ASSERT(!fipe_gbl_ctrl.pm_active);
1248 fipe_ioat_fini();
1249 fipe_mc_fini();
1250
1251 #ifdef FIPE_KSTAT_SUPPORT
1252 if (fipe_gbl_ctrl.fipe_kstat != NULL) {
1253 kstat_delete(fipe_gbl_ctrl.fipe_kstat);
1254 fipe_gbl_ctrl.fipe_kstat = NULL;
1255 }
1256 #endif /* FIPE_KSTAT_SUPPORT */
1257
1258 if (fipe_gbl_ctrl.state_buf != NULL) {
1259 ASSERT(fipe_gbl_ctrl.state_size != 0);
1260 kmem_free(fipe_gbl_ctrl.state_buf, fipe_gbl_ctrl.state_size);
1261 fipe_cpu_states = NULL;
1262 }
1263
1264 fipe_profile_curr = NULL;
1265 mutex_destroy(&fipe_gbl_ctrl.lock);
1266 bzero(&fipe_gbl_ctrl, sizeof (fipe_gbl_ctrl));
1267
1268 return (0);
1269 }
1270
1271 /*
1272 * Start memory power management subsystem.
1273 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1274 */
1275 int
fipe_start(void)1276 fipe_start(void)
1277 {
1278 if (fipe_gbl_ctrl.pm_enabled == B_TRUE) {
1279 return (0);
1280 }
1281
1282 bzero(fipe_cpu_states, max_ncpus * sizeof (fipe_cpu_states[0]));
1283 fipe_ioat_alloc(NULL);
1284 if (fipe_idle_start() != 0) {
1285 cmn_err(CE_NOTE, "!fipe: failed to start PM subsystem.");
1286 fipe_ioat_free();
1287 return (-1);
1288 }
1289
1290 fipe_gbl_ctrl.pm_enabled = B_TRUE;
1291
1292 return (0);
1293 }
1294
1295 /*
1296 * Stop memory power management subsystem.
1297 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1298 */
1299 int
fipe_stop(void)1300 fipe_stop(void)
1301 {
1302 if (fipe_gbl_ctrl.pm_enabled) {
1303 if (fipe_idle_stop() != 0) {
1304 cmn_err(CE_NOTE,
1305 "!fipe: failed to stop PM subsystem.");
1306 return (-1);
1307 }
1308 fipe_ioat_free();
1309 fipe_gbl_ctrl.pm_enabled = B_FALSE;
1310 }
1311 ASSERT(!fipe_gbl_ctrl.pm_active);
1312
1313 return (0);
1314 }
1315
1316 int
fipe_suspend(void)1317 fipe_suspend(void)
1318 {
1319 /* Save current power management policy. */
1320 fipe_pm_policy_saved = fipe_pm_policy;
1321 /* Disable PM by setting profile to FIPE_PM_POLICY_DISABLE. */
1322 fipe_pm_policy = FIPE_PM_POLICY_DISABLE;
1323 fipe_profile_curr = &fipe_profiles[fipe_pm_policy];
1324
1325 return (0);
1326 }
1327
1328 int
fipe_resume(void)1329 fipe_resume(void)
1330 {
1331 /* Restore saved power management policy. */
1332 fipe_pm_policy = fipe_pm_policy_saved;
1333 fipe_profile_curr = &fipe_profiles[fipe_pm_policy];
1334
1335 return (0);
1336 }
1337
1338 fipe_pm_policy_t
fipe_get_pmpolicy(void)1339 fipe_get_pmpolicy(void)
1340 {
1341 return (fipe_pm_policy);
1342 }
1343
1344 int
fipe_set_pmpolicy(fipe_pm_policy_t policy)1345 fipe_set_pmpolicy(fipe_pm_policy_t policy)
1346 {
1347 if (policy < 0 || policy >= FIPE_PM_POLICY_MAX) {
1348 return (EINVAL);
1349 }
1350 fipe_pm_policy = policy;
1351 fipe_profile_curr = &fipe_profiles[fipe_pm_policy];
1352
1353 return (0);
1354 }
1355
1356 /*
1357 * Check condition (fipe_gbl_ctrl.cpu_cnt == ncpus) to make sure that
1358 * there is other CPU trying to wake up system from memory power saving state.
1359 * If a CPU is waking up system, fipe_disable() will set
1360 * fipe_gbl_ctrl.pm_active to false as soon as possible and allow other CPU's
1361 * to continue, and it will take the responsibility to recover system from
1362 * memory power saving state.
1363 */
1364 static void
fipe_enable(int throttle,cpu_idle_check_wakeup_t check_func,void * check_arg)1365 fipe_enable(int throttle, cpu_idle_check_wakeup_t check_func, void* check_arg)
1366 {
1367 extern void membar_sync(void);
1368
1369 FIPE_KSTAT_DETAIL_INC(pm_tryenter_cnt);
1370
1371 /*
1372 * Check CPU wakeup events.
1373 */
1374 if (check_func != NULL) {
1375 (*check_func)(check_arg);
1376 }
1377
1378 /*
1379 * Try to acquire mutex, which also implicitly has the same effect
1380 * of calling membar_sync().
1381 * If mutex_tryenter fails, that means other CPU is waking up.
1382 */
1383 if (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) {
1384 FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
1385 /*
1386 * Handle a special race condition for the case that a CPU wakes
1387 * and then enters into idle state within a short period.
1388 * This case can't be reliably detected by cpu_count mechanism.
1389 */
1390 } else if (fipe_gbl_ctrl.pm_active) {
1391 FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
1392 mutex_exit(&fipe_gbl_ctrl.lock);
1393 } else {
1394 fipe_gbl_ctrl.pm_active = B_TRUE;
1395 membar_sync();
1396 if (fipe_gbl_ctrl.cpu_count != ncpus) {
1397 FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
1398 fipe_gbl_ctrl.pm_active = B_FALSE;
1399 } else if (fipe_ioat_trigger() != 0) {
1400 fipe_gbl_ctrl.pm_active = B_FALSE;
1401 } else if (fipe_gbl_ctrl.cpu_count != ncpus ||
1402 fipe_mc_change(throttle) != 0) {
1403 fipe_gbl_ctrl.pm_active = B_FALSE;
1404 fipe_ioat_cancel();
1405 if (fipe_gbl_ctrl.cpu_count != ncpus) {
1406 FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
1407 }
1408 } else if (fipe_gbl_ctrl.cpu_count != ncpus) {
1409 fipe_gbl_ctrl.pm_active = B_FALSE;
1410 fipe_mc_restore();
1411 fipe_ioat_cancel();
1412 FIPE_KSTAT_DETAIL_INC(pm_race_cnt);
1413 } else {
1414 FIPE_KSTAT_DETAIL_INC(pm_success_cnt);
1415 }
1416 mutex_exit(&fipe_gbl_ctrl.lock);
1417 }
1418 }
1419
1420 static void
fipe_disable(void)1421 fipe_disable(void)
1422 {
1423 /*
1424 * Try to acquire lock, which also implicitly has the same effect
1425 * of calling membar_sync().
1426 */
1427 while (mutex_tryenter(&fipe_gbl_ctrl.lock) == 0) {
1428 /*
1429 * If power saving is inactive, just return and all dirty
1430 * house-keeping work will be handled in fipe_enable().
1431 */
1432 if (fipe_gbl_ctrl.pm_active == B_FALSE) {
1433 return;
1434 } else {
1435 (void) SMT_PAUSE();
1436 }
1437 }
1438
1439 /* Disable power saving if it's active. */
1440 if (fipe_gbl_ctrl.pm_active) {
1441 /*
1442 * Set pm_active to FALSE as soon as possible to prevent
1443 * other CPUs from waiting on pm_active flag.
1444 */
1445 fipe_gbl_ctrl.pm_active = B_FALSE;
1446 membar_producer();
1447 fipe_mc_restore();
1448 fipe_ioat_cancel();
1449 }
1450
1451 mutex_exit(&fipe_gbl_ctrl.lock);
1452 }
1453
1454 /*ARGSUSED*/
1455 static boolean_t
fipe_check_cpu(struct fipe_cpu_state * sp,cpu_idle_callback_context_t ctx,hrtime_t ts)1456 fipe_check_cpu(struct fipe_cpu_state *sp, cpu_idle_callback_context_t ctx,
1457 hrtime_t ts)
1458 {
1459 if (cpu_flagged_offline(CPU->cpu_flags)) {
1460 /* Treat CPU in offline state as ready. */
1461 sp->cond_ready = B_TRUE;
1462 return (B_TRUE);
1463 } else if (sp->next_ts <= ts) {
1464 uint64_t intr;
1465 hrtime_t idle, busy, diff;
1466 cpu_idle_prop_value_t val;
1467
1468 /* Set default value. */
1469 sp->cond_ready = B_TRUE;
1470 sp->idle_count = 0;
1471
1472 /* Calculate idle percent. */
1473 idle = sp->last_idle;
1474 sp->last_idle = cpu_idle_prop_get_hrtime(
1475 fipe_idle_ctrl.prop_idle, ctx);
1476 idle = sp->last_idle - idle;
1477 busy = sp->last_busy;
1478 sp->last_busy = cpu_idle_prop_get_hrtime(
1479 fipe_idle_ctrl.prop_busy, ctx);
1480 busy = sp->last_busy - busy;
1481 /* Check idle condition. */
1482 if (idle > 0 && busy > 0) {
1483 if (busy * (100 - FIPE_PROF_BUSY_THRESHOLD) >
1484 idle * FIPE_PROF_BUSY_THRESHOLD) {
1485 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt);
1486 sp->cond_ready = B_FALSE;
1487 } else {
1488 FIPE_KSTAT_DETAIL_INC(cpu_idle_cnt);
1489 }
1490 } else {
1491 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt);
1492 sp->cond_ready = B_FALSE;
1493 }
1494
1495 /* Calculate interrupt count. */
1496 diff = sp->next_ts;
1497 sp->next_ts = ts + fipe_idle_ctrl.tick_interval;
1498 diff = sp->next_ts - diff;
1499 intr = sp->last_intr;
1500 if (cpu_idle_prop_get_value(fipe_idle_ctrl.prop_intr, ctx,
1501 &val) == 0) {
1502 sp->last_intr = val.cipv_uint64;
1503 intr = sp->last_intr - intr;
1504 if (diff != 0) {
1505 intr = intr * fipe_idle_ctrl.tick_interval;
1506 intr /= diff;
1507 } else {
1508 intr = FIPE_PROF_INTR_THRESHOLD;
1509 }
1510 } else {
1511 intr = FIPE_PROF_INTR_THRESHOLD;
1512 }
1513
1514 /*
1515 * System is busy with interrupts, so disable all PM
1516 * status checks for INTR_BUSY_THROTTLE ticks.
1517 * Interrupts are disabled when FIPE callbacks are called,
1518 * so this optimization will help to reduce interrupt
1519 * latency.
1520 */
1521 if (intr >= FIPE_PROF_INTR_BUSY_THRESHOLD) {
1522 FIPE_KSTAT_DETAIL_INC(cpu_intr_busy_cnt);
1523 sp->throttle_ts = ts + FIPE_PROF_INTR_BUSY_THROTTLE *
1524 fipe_idle_ctrl.tick_interval;
1525 sp->cond_ready = B_FALSE;
1526 } else if (intr >= FIPE_PROF_INTR_THRESHOLD) {
1527 FIPE_KSTAT_DETAIL_INC(cpu_intr_throttle_cnt);
1528 sp->cond_ready = B_FALSE;
1529 }
1530 } else if (++sp->idle_count >= FIPE_PROF_IDLE_COUNT) {
1531 /* Too many idle enter/exit in this tick. */
1532 FIPE_KSTAT_DETAIL_INC(cpu_loop_cnt);
1533 sp->throttle_ts = sp->next_ts + fipe_idle_ctrl.tick_interval;
1534 sp->idle_count = 0;
1535 sp->cond_ready = B_FALSE;
1536 return (B_FALSE);
1537 }
1538
1539 return (sp->cond_ready);
1540 }
1541
1542 /*ARGSUSED*/
1543 static void
fipe_idle_enter(void * arg,cpu_idle_callback_context_t ctx,cpu_idle_check_wakeup_t check_func,void * check_arg)1544 fipe_idle_enter(void *arg, cpu_idle_callback_context_t ctx,
1545 cpu_idle_check_wakeup_t check_func, void* check_arg)
1546 {
1547 hrtime_t ts;
1548 uint32_t cnt;
1549 uint64_t iowait;
1550 cpu_t *cp = CPU;
1551 struct fipe_cpu_state *sp;
1552
1553 sp = &fipe_cpu_states[cp->cpu_id];
1554 ts = cpu_idle_prop_get_hrtime(fipe_idle_ctrl.prop_enter, ctx);
1555
1556 if (fipe_pm_policy != FIPE_PM_POLICY_DISABLE &&
1557 fipe_ioat_ctrl.ioat_ready &&
1558 sp->state_ready && sp->throttle_ts <= ts) {
1559 /* Adjust iowait count for local CPU. */
1560 iowait = CPU_STATS(cp, sys.iowait);
1561 if (iowait != sp->last_iowait) {
1562 atomic_add_64(&fipe_gbl_ctrl.io_waiters,
1563 iowait - sp->last_iowait);
1564 sp->last_iowait = iowait;
1565 }
1566
1567 /* Check current CPU status. */
1568 if (fipe_check_cpu(sp, ctx, ts)) {
1569 /* Increase count of CPU ready for power saving. */
1570 do {
1571 cnt = fipe_gbl_ctrl.cpu_count;
1572 ASSERT(cnt < ncpus);
1573 } while (atomic_cas_32(&fipe_gbl_ctrl.cpu_count,
1574 cnt, cnt + 1) != cnt);
1575
1576 /*
1577 * Enable power saving if all CPUs are idle.
1578 */
1579 if (cnt + 1 == ncpus) {
1580 if (fipe_gbl_ctrl.io_waiters == 0) {
1581 fipe_gbl_ctrl.enter_ts = ts;
1582 fipe_enable(fipe_pm_throttle_level,
1583 check_func, check_arg);
1584 /* There are ongoing block io operations. */
1585 } else {
1586 FIPE_KSTAT_DETAIL_INC(bio_busy_cnt);
1587 }
1588 }
1589 }
1590 } else if (fipe_pm_policy == FIPE_PM_POLICY_DISABLE ||
1591 fipe_ioat_ctrl.ioat_ready == B_FALSE) {
1592 if (sp->cond_ready == B_TRUE) {
1593 sp->cond_ready = B_FALSE;
1594 }
1595 } else if (sp->state_ready == B_FALSE) {
1596 sp->cond_ready = B_FALSE;
1597 sp->state_ready = B_TRUE;
1598 sp->throttle_ts = 0;
1599 sp->next_ts = ts + fipe_idle_ctrl.tick_interval;
1600 sp->last_busy = cpu_idle_prop_get_hrtime(
1601 fipe_idle_ctrl.prop_busy, ctx);
1602 sp->last_idle = cpu_idle_prop_get_hrtime(
1603 fipe_idle_ctrl.prop_idle, ctx);
1604 sp->last_intr = cpu_idle_prop_get_hrtime(
1605 fipe_idle_ctrl.prop_intr, ctx);
1606 sp->idle_count = 0;
1607 }
1608 }
1609
1610 /*ARGSUSED*/
1611 static void
fipe_idle_exit(void * arg,cpu_idle_callback_context_t ctx,int flags)1612 fipe_idle_exit(void* arg, cpu_idle_callback_context_t ctx, int flags)
1613 {
1614 uint32_t cnt;
1615 hrtime_t ts;
1616 struct fipe_cpu_state *sp;
1617
1618 sp = &fipe_cpu_states[CPU->cpu_id];
1619 if (sp->cond_ready) {
1620 do {
1621 cnt = fipe_gbl_ctrl.cpu_count;
1622 ASSERT(cnt > 0);
1623 } while (atomic_cas_32(&fipe_gbl_ctrl.cpu_count,
1624 cnt, cnt - 1) != cnt);
1625
1626 /*
1627 * Try to disable power saving state.
1628 * Only the first CPU waking from idle state will try to
1629 * disable power saving state, all other CPUs will just go
1630 * on and not try to wait for memory to recover from power
1631 * saving state.
1632 * So there are possible periods during which some CPUs are in
1633 * active state but memory is in power saving state.
1634 * This is OK, since it is an uncommon case, and it is
1635 * better for performance to let them continue as their
1636 * blocking latency is smaller than a mutex, and is only
1637 * hit in the uncommon condition.
1638 */
1639 if (cnt == ncpus) {
1640 fipe_disable();
1641 ts = cpu_idle_prop_get_hrtime(fipe_idle_ctrl.prop_exit,
1642 ctx);
1643 fipe_gbl_ctrl.time_in_pm += ts - fipe_gbl_ctrl.enter_ts;
1644 }
1645 }
1646 }
1647