xref: /linux/drivers/gpu/drm/imagination/pvr_power.c (revision 54fd6bd42e7bd351802ff1d193a2e33e4bfb1836)
1 // SPDX-License-Identifier: GPL-2.0-only OR MIT
2 /* Copyright (c) 2023 Imagination Technologies Ltd. */
3 
4 #include "pvr_device.h"
5 #include "pvr_fw.h"
6 #include "pvr_fw_startstop.h"
7 #include "pvr_power.h"
8 #include "pvr_queue.h"
9 #include "pvr_rogue_fwif.h"
10 
11 #include <drm/drm_drv.h>
12 #include <drm/drm_managed.h>
13 #include <linux/cleanup.h>
14 #include <linux/clk.h>
15 #include <linux/interrupt.h>
16 #include <linux/mutex.h>
17 #include <linux/of.h>
18 #include <linux/platform_device.h>
19 #include <linux/pm_domain.h>
20 #include <linux/pm_runtime.h>
21 #include <linux/reset.h>
22 #include <linux/timer.h>
23 #include <linux/types.h>
24 #include <linux/workqueue.h>
25 
26 #define POWER_SYNC_TIMEOUT_US (1000000) /* 1s */
27 
28 #define WATCHDOG_TIME_MS (500)
29 
30 /**
31  * pvr_device_lost() - Mark GPU device as lost
32  * @pvr_dev: Target PowerVR device.
33  *
34  * This will cause the DRM device to be unplugged.
35  */
36 void
37 pvr_device_lost(struct pvr_device *pvr_dev)
38 {
39 	if (!pvr_dev->lost) {
40 		pvr_dev->lost = true;
41 		drm_dev_unplug(from_pvr_device(pvr_dev));
42 	}
43 }
44 
45 static int
46 pvr_power_send_command(struct pvr_device *pvr_dev, struct rogue_fwif_kccb_cmd *pow_cmd)
47 {
48 	struct pvr_fw_device *fw_dev = &pvr_dev->fw_dev;
49 	u32 slot_nr;
50 	u32 value;
51 	int err;
52 
53 	WRITE_ONCE(*fw_dev->power_sync, 0);
54 
55 	err = pvr_kccb_send_cmd_powered(pvr_dev, pow_cmd, &slot_nr);
56 	if (err)
57 		return err;
58 
59 	/* Wait for FW to acknowledge. */
60 	return readl_poll_timeout(pvr_dev->fw_dev.power_sync, value, value != 0, 100,
61 				  POWER_SYNC_TIMEOUT_US);
62 }
63 
64 static int
65 pvr_power_request_idle(struct pvr_device *pvr_dev)
66 {
67 	struct rogue_fwif_kccb_cmd pow_cmd;
68 
69 	/* Send FORCED_IDLE request to FW. */
70 	pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
71 	pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_FORCED_IDLE_REQ;
72 	pow_cmd.cmd_data.pow_data.power_req_data.pow_request_type = ROGUE_FWIF_POWER_FORCE_IDLE;
73 
74 	return pvr_power_send_command(pvr_dev, &pow_cmd);
75 }
76 
77 static int
78 pvr_power_request_pwr_off(struct pvr_device *pvr_dev)
79 {
80 	struct rogue_fwif_kccb_cmd pow_cmd;
81 
82 	/* Send POW_OFF request to firmware. */
83 	pow_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_POW;
84 	pow_cmd.cmd_data.pow_data.pow_type = ROGUE_FWIF_POW_OFF_REQ;
85 	pow_cmd.cmd_data.pow_data.power_req_data.forced = true;
86 
87 	return pvr_power_send_command(pvr_dev, &pow_cmd);
88 }
89 
90 static int
91 pvr_power_fw_disable(struct pvr_device *pvr_dev, bool hard_reset)
92 {
93 	if (!hard_reset) {
94 		int err;
95 
96 		cancel_delayed_work_sync(&pvr_dev->watchdog.work);
97 
98 		err = pvr_power_request_idle(pvr_dev);
99 		if (err)
100 			return err;
101 
102 		err = pvr_power_request_pwr_off(pvr_dev);
103 		if (err)
104 			return err;
105 	}
106 
107 	return pvr_fw_stop(pvr_dev);
108 }
109 
110 static int
111 pvr_power_fw_enable(struct pvr_device *pvr_dev)
112 {
113 	int err;
114 
115 	err = pvr_fw_start(pvr_dev);
116 	if (err)
117 		return err;
118 
119 	err = pvr_wait_for_fw_boot(pvr_dev);
120 	if (err) {
121 		drm_err(from_pvr_device(pvr_dev), "Firmware failed to boot\n");
122 		pvr_fw_stop(pvr_dev);
123 		return err;
124 	}
125 
126 	queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work,
127 			   msecs_to_jiffies(WATCHDOG_TIME_MS));
128 
129 	return 0;
130 }
131 
132 bool
133 pvr_power_is_idle(struct pvr_device *pvr_dev)
134 {
135 	/*
136 	 * FW power state can be out of date if a KCCB command has been submitted but the FW hasn't
137 	 * started processing it yet. So also check the KCCB status.
138 	 */
139 	enum rogue_fwif_pow_state pow_state = READ_ONCE(pvr_dev->fw_dev.fwif_sysdata->pow_state);
140 	bool kccb_idle = pvr_kccb_is_idle(pvr_dev);
141 
142 	return (pow_state == ROGUE_FWIF_POW_IDLE) && kccb_idle;
143 }
144 
145 static bool
146 pvr_watchdog_kccb_stalled(struct pvr_device *pvr_dev)
147 {
148 	/* Check KCCB commands are progressing. */
149 	u32 kccb_cmds_executed = pvr_dev->fw_dev.fwif_osdata->kccb_cmds_executed;
150 	bool kccb_is_idle = pvr_kccb_is_idle(pvr_dev);
151 
152 	if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed && !kccb_is_idle) {
153 		pvr_dev->watchdog.kccb_stall_count++;
154 
155 		/*
156 		 * If we have commands pending with no progress for 2 consecutive polls then
157 		 * consider KCCB command processing stalled.
158 		 */
159 		if (pvr_dev->watchdog.kccb_stall_count == 2) {
160 			pvr_dev->watchdog.kccb_stall_count = 0;
161 			return true;
162 		}
163 	} else if (pvr_dev->watchdog.old_kccb_cmds_executed == kccb_cmds_executed) {
164 		bool has_active_contexts;
165 
166 		mutex_lock(&pvr_dev->queues.lock);
167 		has_active_contexts = list_empty(&pvr_dev->queues.active);
168 		mutex_unlock(&pvr_dev->queues.lock);
169 
170 		if (has_active_contexts) {
171 			/* Send a HEALTH_CHECK command so we can verify FW is still alive. */
172 			struct rogue_fwif_kccb_cmd health_check_cmd;
173 
174 			health_check_cmd.cmd_type = ROGUE_FWIF_KCCB_CMD_HEALTH_CHECK;
175 
176 			pvr_kccb_send_cmd_powered(pvr_dev, &health_check_cmd, NULL);
177 		}
178 	} else {
179 		pvr_dev->watchdog.old_kccb_cmds_executed = kccb_cmds_executed;
180 		pvr_dev->watchdog.kccb_stall_count = 0;
181 	}
182 
183 	return false;
184 }
185 
186 static void
187 pvr_watchdog_worker(struct work_struct *work)
188 {
189 	struct pvr_device *pvr_dev = container_of(work, struct pvr_device,
190 						  watchdog.work.work);
191 	bool stalled;
192 
193 	if (pvr_dev->lost)
194 		return;
195 
196 	if (pm_runtime_get_if_in_use(from_pvr_device(pvr_dev)->dev) <= 0)
197 		goto out_requeue;
198 
199 	if (!pvr_dev->fw_dev.booted)
200 		goto out_pm_runtime_put;
201 
202 	stalled = pvr_watchdog_kccb_stalled(pvr_dev);
203 
204 	if (stalled) {
205 		drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset");
206 
207 		pvr_power_reset(pvr_dev, true);
208 		/* Device may be lost at this point. */
209 	}
210 
211 out_pm_runtime_put:
212 	pm_runtime_put(from_pvr_device(pvr_dev)->dev);
213 
214 out_requeue:
215 	if (!pvr_dev->lost) {
216 		queue_delayed_work(pvr_dev->sched_wq, &pvr_dev->watchdog.work,
217 				   msecs_to_jiffies(WATCHDOG_TIME_MS));
218 	}
219 }
220 
221 /**
222  * pvr_watchdog_init() - Initialise watchdog for device
223  * @pvr_dev: Target PowerVR device.
224  *
225  * Returns:
226  *  * 0 on success, or
227  *  * -%ENOMEM on out of memory.
228  */
229 int
230 pvr_watchdog_init(struct pvr_device *pvr_dev)
231 {
232 	INIT_DELAYED_WORK(&pvr_dev->watchdog.work, pvr_watchdog_worker);
233 
234 	return 0;
235 }
236 
237 int
238 pvr_power_device_suspend(struct device *dev)
239 {
240 	struct platform_device *plat_dev = to_platform_device(dev);
241 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
242 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
243 	int err = 0;
244 	int idx;
245 
246 	if (!drm_dev_enter(drm_dev, &idx))
247 		return -EIO;
248 
249 	if (pvr_dev->fw_dev.booted) {
250 		err = pvr_power_fw_disable(pvr_dev, false);
251 		if (err)
252 			goto err_drm_dev_exit;
253 	}
254 
255 	clk_disable_unprepare(pvr_dev->mem_clk);
256 	clk_disable_unprepare(pvr_dev->sys_clk);
257 	clk_disable_unprepare(pvr_dev->core_clk);
258 
259 	err = reset_control_assert(pvr_dev->reset);
260 
261 err_drm_dev_exit:
262 	drm_dev_exit(idx);
263 
264 	return err;
265 }
266 
267 int
268 pvr_power_device_resume(struct device *dev)
269 {
270 	struct platform_device *plat_dev = to_platform_device(dev);
271 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
272 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
273 	int idx;
274 	int err;
275 
276 	if (!drm_dev_enter(drm_dev, &idx))
277 		return -EIO;
278 
279 	err = clk_prepare_enable(pvr_dev->core_clk);
280 	if (err)
281 		goto err_drm_dev_exit;
282 
283 	err = clk_prepare_enable(pvr_dev->sys_clk);
284 	if (err)
285 		goto err_core_clk_disable;
286 
287 	err = clk_prepare_enable(pvr_dev->mem_clk);
288 	if (err)
289 		goto err_sys_clk_disable;
290 
291 	/*
292 	 * According to the hardware manual, a delay of at least 32 clock
293 	 * cycles is required between de-asserting the clkgen reset and
294 	 * de-asserting the GPU reset. Assuming a worst-case scenario with
295 	 * a very high GPU clock frequency, a delay of 1 microsecond is
296 	 * sufficient to ensure this requirement is met across all
297 	 * feasible GPU clock speeds.
298 	 */
299 	udelay(1);
300 
301 	err = reset_control_deassert(pvr_dev->reset);
302 	if (err)
303 		goto err_mem_clk_disable;
304 
305 	if (pvr_dev->fw_dev.booted) {
306 		err = pvr_power_fw_enable(pvr_dev);
307 		if (err)
308 			goto err_reset_assert;
309 	}
310 
311 	drm_dev_exit(idx);
312 
313 	return 0;
314 
315 err_reset_assert:
316 	reset_control_assert(pvr_dev->reset);
317 
318 err_mem_clk_disable:
319 	clk_disable_unprepare(pvr_dev->mem_clk);
320 
321 err_sys_clk_disable:
322 	clk_disable_unprepare(pvr_dev->sys_clk);
323 
324 err_core_clk_disable:
325 	clk_disable_unprepare(pvr_dev->core_clk);
326 
327 err_drm_dev_exit:
328 	drm_dev_exit(idx);
329 
330 	return err;
331 }
332 
333 int
334 pvr_power_device_idle(struct device *dev)
335 {
336 	struct platform_device *plat_dev = to_platform_device(dev);
337 	struct drm_device *drm_dev = platform_get_drvdata(plat_dev);
338 	struct pvr_device *pvr_dev = to_pvr_device(drm_dev);
339 
340 	return pvr_power_is_idle(pvr_dev) ? 0 : -EBUSY;
341 }
342 
343 static int
344 pvr_power_clear_error(struct pvr_device *pvr_dev)
345 {
346 	struct device *dev = from_pvr_device(pvr_dev)->dev;
347 	int err;
348 
349 	/* Ensure the device state is known and nothing is happening past this point */
350 	pm_runtime_disable(dev);
351 
352 	/* Attempt to clear the runtime PM error by setting the current state again */
353 	if (pm_runtime_status_suspended(dev))
354 		err = pm_runtime_set_suspended(dev);
355 	else
356 		err = pm_runtime_set_active(dev);
357 
358 	if (err) {
359 		drm_err(from_pvr_device(pvr_dev),
360 			"%s: Failed to clear runtime PM error (new error %d)\n",
361 			__func__, err);
362 	}
363 
364 	pm_runtime_enable(dev);
365 
366 	return err;
367 }
368 
369 /**
370  * pvr_power_get_clear() - Acquire a power reference, correcting any errors
371  * @pvr_dev: Device pointer
372  *
373  * Attempt to acquire a power reference on the device. If the runtime PM
374  * is in error state, attempt to clear the error and retry.
375  *
376  * Returns:
377  *  * 0 on success, or
378  *  * Any error code returned by pvr_power_get() or the runtime PM API.
379  */
380 static int
381 pvr_power_get_clear(struct pvr_device *pvr_dev)
382 {
383 	int err;
384 
385 	err = pvr_power_get(pvr_dev);
386 	if (err == 0)
387 		return err;
388 
389 	drm_warn(from_pvr_device(pvr_dev),
390 		 "%s: pvr_power_get returned error %d, attempting recovery\n",
391 		 __func__, err);
392 
393 	err = pvr_power_clear_error(pvr_dev);
394 	if (err)
395 		return err;
396 
397 	return pvr_power_get(pvr_dev);
398 }
399 
400 /**
401  * pvr_power_reset() - Reset the GPU
402  * @pvr_dev: Device pointer
403  * @hard_reset: %true for hard reset, %false for soft reset
404  *
405  * If @hard_reset is %false and the FW processor fails to respond during the reset process, this
406  * function will attempt a hard reset.
407  *
408  * If a hard reset fails then the GPU device is reported as lost.
409  *
410  * Returns:
411  *  * 0 on success, or
412  *  * Any error code returned by pvr_power_get, pvr_power_fw_disable or pvr_power_fw_enable().
413  */
414 int
415 pvr_power_reset(struct pvr_device *pvr_dev, bool hard_reset)
416 {
417 	bool queues_disabled = false;
418 	int err;
419 
420 	/*
421 	 * Take a power reference during the reset. This should prevent any interference with the
422 	 * power state during reset.
423 	 */
424 	WARN_ON(pvr_power_get_clear(pvr_dev));
425 
426 	down_write(&pvr_dev->reset_sem);
427 
428 	if (pvr_dev->lost) {
429 		err = -EIO;
430 		goto err_up_write;
431 	}
432 
433 	/* Disable IRQs for the duration of the reset. */
434 	disable_irq(pvr_dev->irq);
435 
436 	do {
437 		if (hard_reset) {
438 			pvr_queue_device_pre_reset(pvr_dev);
439 			queues_disabled = true;
440 		}
441 
442 		err = pvr_power_fw_disable(pvr_dev, hard_reset);
443 		if (!err) {
444 			if (hard_reset) {
445 				pvr_dev->fw_dev.booted = false;
446 				WARN_ON(pvr_power_device_suspend(from_pvr_device(pvr_dev)->dev));
447 
448 				err = pvr_fw_hard_reset(pvr_dev);
449 				if (err)
450 					goto err_device_lost;
451 
452 				err = pvr_power_device_resume(from_pvr_device(pvr_dev)->dev);
453 				pvr_dev->fw_dev.booted = true;
454 				if (err)
455 					goto err_device_lost;
456 			} else {
457 				/* Clear the FW faulted flags. */
458 				pvr_dev->fw_dev.fwif_sysdata->hwr_state_flags &=
459 					~(ROGUE_FWIF_HWR_FW_FAULT |
460 					  ROGUE_FWIF_HWR_RESTART_REQUESTED);
461 			}
462 
463 			pvr_fw_irq_clear(pvr_dev);
464 
465 			err = pvr_power_fw_enable(pvr_dev);
466 		}
467 
468 		if (err && hard_reset)
469 			goto err_device_lost;
470 
471 		if (err && !hard_reset) {
472 			drm_err(from_pvr_device(pvr_dev), "FW stalled, trying hard reset");
473 			hard_reset = true;
474 		}
475 	} while (err);
476 
477 	if (queues_disabled)
478 		pvr_queue_device_post_reset(pvr_dev);
479 
480 	enable_irq(pvr_dev->irq);
481 
482 	up_write(&pvr_dev->reset_sem);
483 
484 	pvr_power_put(pvr_dev);
485 
486 	return 0;
487 
488 err_device_lost:
489 	drm_err(from_pvr_device(pvr_dev), "GPU device lost");
490 	pvr_device_lost(pvr_dev);
491 
492 	/* Leave IRQs disabled if the device is lost. */
493 
494 	if (queues_disabled)
495 		pvr_queue_device_post_reset(pvr_dev);
496 
497 err_up_write:
498 	up_write(&pvr_dev->reset_sem);
499 
500 	pvr_power_put(pvr_dev);
501 
502 	return err;
503 }
504 
505 /**
506  * pvr_watchdog_fini() - Shutdown watchdog for device
507  * @pvr_dev: Target PowerVR device.
508  */
509 void
510 pvr_watchdog_fini(struct pvr_device *pvr_dev)
511 {
512 	cancel_delayed_work_sync(&pvr_dev->watchdog.work);
513 }
514 
515 int pvr_power_domains_init(struct pvr_device *pvr_dev)
516 {
517 	struct device *dev = from_pvr_device(pvr_dev)->dev;
518 
519 	struct device_link **domain_links __free(kfree) = NULL;
520 	struct device **domain_devs __free(kfree) = NULL;
521 	int domain_count;
522 	int link_count;
523 
524 	char dev_name[2] = "a";
525 	int err;
526 	int i;
527 
528 	domain_count = of_count_phandle_with_args(dev->of_node, "power-domains",
529 						  "#power-domain-cells");
530 	if (domain_count < 0)
531 		return domain_count;
532 
533 	if (domain_count <= 1)
534 		return 0;
535 
536 	link_count = domain_count + (domain_count - 1);
537 
538 	domain_devs = kcalloc(domain_count, sizeof(*domain_devs), GFP_KERNEL);
539 	if (!domain_devs)
540 		return -ENOMEM;
541 
542 	domain_links = kcalloc(link_count, sizeof(*domain_links), GFP_KERNEL);
543 	if (!domain_links)
544 		return -ENOMEM;
545 
546 	for (i = 0; i < domain_count; i++) {
547 		struct device *domain_dev;
548 
549 		dev_name[0] = 'a' + i;
550 		domain_dev = dev_pm_domain_attach_by_name(dev, dev_name);
551 		if (IS_ERR_OR_NULL(domain_dev)) {
552 			err = domain_dev ? PTR_ERR(domain_dev) : -ENODEV;
553 			goto err_detach;
554 		}
555 
556 		domain_devs[i] = domain_dev;
557 	}
558 
559 	for (i = 0; i < domain_count; i++) {
560 		struct device_link *link;
561 
562 		link = device_link_add(dev, domain_devs[i], DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
563 		if (!link) {
564 			err = -ENODEV;
565 			goto err_unlink;
566 		}
567 
568 		domain_links[i] = link;
569 	}
570 
571 	for (i = domain_count; i < link_count; i++) {
572 		struct device_link *link;
573 
574 		link = device_link_add(domain_devs[i - domain_count + 1],
575 				       domain_devs[i - domain_count],
576 				       DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
577 		if (!link) {
578 			err = -ENODEV;
579 			goto err_unlink;
580 		}
581 
582 		domain_links[i] = link;
583 	}
584 
585 	pvr_dev->power = (struct pvr_device_power){
586 		.domain_devs = no_free_ptr(domain_devs),
587 		.domain_links = no_free_ptr(domain_links),
588 		.domain_count = domain_count,
589 	};
590 
591 	return 0;
592 
593 err_unlink:
594 	while (--i >= 0)
595 		device_link_del(domain_links[i]);
596 
597 	i = domain_count;
598 
599 err_detach:
600 	while (--i >= 0)
601 		dev_pm_domain_detach(domain_devs[i], true);
602 
603 	return err;
604 }
605 
606 void pvr_power_domains_fini(struct pvr_device *pvr_dev)
607 {
608 	const int domain_count = pvr_dev->power.domain_count;
609 
610 	int i = domain_count + (domain_count - 1);
611 
612 	while (--i >= 0)
613 		device_link_del(pvr_dev->power.domain_links[i]);
614 
615 	i = domain_count;
616 
617 	while (--i >= 0)
618 		dev_pm_domain_detach(pvr_dev->power.domain_devs[i], true);
619 
620 	kfree(pvr_dev->power.domain_links);
621 	kfree(pvr_dev->power.domain_devs);
622 
623 	pvr_dev->power = (struct pvr_device_power){ 0 };
624 }
625