1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2017-2019 The Linux Foundation. All rights reserved. */
3
4 #include <linux/bitfield.h>
5 #include <linux/clk.h>
6 #include <linux/interconnect.h>
7 #include <linux/of_platform.h>
8 #include <linux/platform_device.h>
9 #include <linux/pm_domain.h>
10 #include <linux/pm_opp.h>
11 #include <soc/qcom/cmd-db.h>
12 #include <soc/qcom/tcs.h>
13 #include <drm/drm_gem.h>
14
15 #include "a6xx_gpu.h"
16 #include "a6xx_gmu.xml.h"
17 #include "msm_gem.h"
18 #include "msm_gpu_trace.h"
19 #include "msm_mmu.h"
20
a6xx_gmu_fault(struct a6xx_gmu * gmu)21 static void a6xx_gmu_fault(struct a6xx_gmu *gmu)
22 {
23 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
24 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
25 struct msm_gpu *gpu = &adreno_gpu->base;
26
27 /* FIXME: add a banner here */
28 gmu->hung = true;
29
30 /* Turn off the hangcheck timer while we are resetting */
31 timer_delete(&gpu->hangcheck_timer);
32
33 /* Queue the GPU handler because we need to treat this as a recovery */
34 kthread_queue_work(gpu->worker, &gpu->recover_work);
35 }
36
a6xx_gmu_irq(int irq,void * data)37 static irqreturn_t a6xx_gmu_irq(int irq, void *data)
38 {
39 struct a6xx_gmu *gmu = data;
40 u32 status;
41
42 status = gmu_read(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_STATUS);
43 gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, status);
44
45 if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE) {
46 dev_err_ratelimited(gmu->dev, "GMU watchdog expired\n");
47
48 a6xx_gmu_fault(gmu);
49 }
50
51 if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR)
52 dev_err_ratelimited(gmu->dev, "GMU AHB bus error\n");
53
54 if (status & A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR)
55 dev_err_ratelimited(gmu->dev, "GMU fence error: 0x%x\n",
56 gmu_read(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS));
57
58 return IRQ_HANDLED;
59 }
60
a6xx_hfi_irq(int irq,void * data)61 static irqreturn_t a6xx_hfi_irq(int irq, void *data)
62 {
63 struct a6xx_gmu *gmu = data;
64 u32 status;
65
66 status = gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO);
67 gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, status);
68
69 if (status & A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT) {
70 dev_err_ratelimited(gmu->dev, "GMU firmware fault\n");
71
72 a6xx_gmu_fault(gmu);
73 }
74
75 return IRQ_HANDLED;
76 }
77
a6xx_gmu_sptprac_is_on(struct a6xx_gmu * gmu)78 bool a6xx_gmu_sptprac_is_on(struct a6xx_gmu *gmu)
79 {
80 u32 val;
81
82 /* This can be called from gpu state code so make sure GMU is valid */
83 if (!gmu->initialized)
84 return false;
85
86 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
87
88 return !(val &
89 (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SPTPRAC_GDSC_POWER_OFF |
90 A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_SP_CLOCK_OFF));
91 }
92
93 /* Check to see if the GX rail is still powered */
a6xx_gmu_gx_is_on(struct a6xx_gmu * gmu)94 bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu)
95 {
96 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
97 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
98 u32 val;
99
100 /* This can be called from gpu state code so make sure GMU is valid */
101 if (!gmu->initialized)
102 return false;
103
104 /* If GMU is absent, then GX power domain is ON as long as GPU is in active state */
105 if (adreno_has_gmu_wrapper(adreno_gpu))
106 return true;
107
108 val = gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS);
109
110 if (adreno_is_a7xx(adreno_gpu))
111 return !(val &
112 (A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
113 A7XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
114
115 return !(val &
116 (A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_GDSC_POWER_OFF |
117 A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF));
118 }
119
a6xx_gmu_set_freq(struct msm_gpu * gpu,struct dev_pm_opp * opp,bool suspended)120 void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp,
121 bool suspended)
122 {
123 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
124 const struct a6xx_info *info = adreno_gpu->info->a6xx;
125 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
126 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
127 u32 perf_index;
128 u32 bw_index = 0;
129 unsigned long gpu_freq;
130 int ret = 0;
131
132 gpu_freq = dev_pm_opp_get_freq(opp);
133
134 if (gpu_freq == gmu->freq)
135 return;
136
137 for (perf_index = 0; perf_index < gmu->nr_gpu_freqs - 1; perf_index++)
138 if (gpu_freq == gmu->gpu_freqs[perf_index])
139 break;
140
141 /* If enabled, find the corresponding DDR bandwidth index */
142 if (info->bcms && gmu->nr_gpu_bws > 1) {
143 unsigned int bw = dev_pm_opp_get_bw(opp, true, 0);
144
145 for (bw_index = 0; bw_index < gmu->nr_gpu_bws - 1; bw_index++) {
146 if (bw == gmu->gpu_bw_table[bw_index])
147 break;
148 }
149
150 /* Vote AB as a fraction of the max bandwidth, starting from A750 */
151 if (bw && adreno_is_a750_family(adreno_gpu)) {
152 u64 tmp;
153
154 /* For now, vote for 25% of the bandwidth */
155 tmp = bw * 25;
156 do_div(tmp, 100);
157
158 /*
159 * The AB vote consists of a 16 bit wide quantized level
160 * against the maximum supported bandwidth.
161 * Quantization can be calculated as below:
162 * vote = (bandwidth * 2^16) / max bandwidth
163 */
164 tmp *= MAX_AB_VOTE;
165 do_div(tmp, gmu->gpu_bw_table[gmu->nr_gpu_bws - 1]);
166
167 bw_index |= AB_VOTE(clamp(tmp, 1, MAX_AB_VOTE));
168 bw_index |= AB_VOTE_ENABLE;
169 }
170 }
171
172 gmu->current_perf_index = perf_index;
173 gmu->freq = gmu->gpu_freqs[perf_index];
174
175 trace_msm_gmu_freq_change(gmu->freq, perf_index);
176
177 /*
178 * This can get called from devfreq while the hardware is idle. Don't
179 * bring up the power if it isn't already active. All we're doing here
180 * is updating the frequency so that when we come back online we're at
181 * the right rate.
182 */
183 if (suspended)
184 return;
185
186 if (!gmu->legacy) {
187 a6xx_hfi_set_freq(gmu, perf_index, bw_index);
188 /* With Bandwidth voting, we now vote for all resources, so skip OPP set */
189 if (!bw_index)
190 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
191 return;
192 }
193
194 gmu_write(gmu, REG_A6XX_GMU_DCVS_ACK_OPTION, 0);
195
196 gmu_write(gmu, REG_A6XX_GMU_DCVS_PERF_SETTING,
197 ((3 & 0xf) << 28) | perf_index);
198
199 /*
200 * Send an invalid index as a vote for the bus bandwidth and let the
201 * firmware decide on the right vote
202 */
203 gmu_write(gmu, REG_A6XX_GMU_DCVS_BW_SETTING, 0xff);
204
205 /* Set and clear the OOB for DCVS to trigger the GMU */
206 a6xx_gmu_set_oob(gmu, GMU_OOB_DCVS_SET);
207 a6xx_gmu_clear_oob(gmu, GMU_OOB_DCVS_SET);
208
209 ret = gmu_read(gmu, REG_A6XX_GMU_DCVS_RETURN);
210 if (ret)
211 dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret);
212
213 dev_pm_opp_set_opp(&gpu->pdev->dev, opp);
214 }
215
a6xx_gmu_get_freq(struct msm_gpu * gpu)216 unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu)
217 {
218 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
219 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
220 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
221
222 return gmu->freq;
223 }
224
a6xx_gmu_check_idle_level(struct a6xx_gmu * gmu)225 static bool a6xx_gmu_check_idle_level(struct a6xx_gmu *gmu)
226 {
227 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
228 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
229 int local = gmu->idle_level;
230 u32 val;
231
232 /* SPTP and IFPC both report as IFPC */
233 if (gmu->idle_level == GMU_IDLE_STATE_SPTP)
234 local = GMU_IDLE_STATE_IFPC;
235
236 if (adreno_is_a8xx(adreno_gpu))
237 val = gmu_read(gmu, REG_A8XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
238 else
239 val = gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE);
240
241 if (val == local) {
242 if (gmu->idle_level != GMU_IDLE_STATE_IFPC ||
243 !a6xx_gmu_gx_is_on(gmu))
244 return true;
245 }
246
247 return false;
248 }
249
250 /* Wait for the GMU to get to its most idle state */
a6xx_gmu_wait_for_idle(struct a6xx_gmu * gmu)251 int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu)
252 {
253 return spin_until(a6xx_gmu_check_idle_level(gmu));
254 }
255
a6xx_gmu_start(struct a6xx_gmu * gmu)256 static int a6xx_gmu_start(struct a6xx_gmu *gmu)
257 {
258 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
259 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
260 u32 mask, reset_val, val;
261 int ret;
262
263 val = gmu_read(gmu, REG_A6XX_GMU_CM3_DTCM_START + 0xff8);
264 if (val <= 0x20010004) {
265 mask = 0xffffffff;
266 reset_val = 0xbabeface;
267 } else {
268 mask = 0x1ff;
269 reset_val = 0x100;
270 }
271
272 gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1);
273
274 /* Set the log wptr index
275 * note: downstream saves the value in poweroff and restores it here
276 */
277 if (adreno_is_a8xx(adreno_gpu))
278 gmu_write(gmu, REG_A8XX_GMU_GENERAL_9, 0);
279 else if (adreno_is_a7xx(adreno_gpu))
280 gmu_write(gmu, REG_A7XX_GMU_GENERAL_9, 0);
281 else
282 gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP, 0);
283
284
285 gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0);
286
287 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, val,
288 (val & mask) == reset_val, 100, 10000);
289
290 if (ret)
291 DRM_DEV_ERROR(gmu->dev, "GMU firmware initialization timed out\n");
292
293 set_bit(GMU_STATUS_FW_START, &gmu->status);
294
295 return ret;
296 }
297
a6xx_gmu_hfi_start(struct a6xx_gmu * gmu)298 static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu)
299 {
300 u32 val;
301 int ret;
302
303 gmu_write(gmu, REG_A6XX_GMU_HFI_CTRL_INIT, 1);
304
305 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_HFI_CTRL_STATUS, val,
306 val & 1, 100, 10000);
307 if (ret)
308 DRM_DEV_ERROR(gmu->dev, "Unable to start the HFI queues\n");
309
310 return ret;
311 }
312
313 struct a6xx_gmu_oob_bits {
314 int set, ack, set_new, ack_new, clear, clear_new;
315 const char *name;
316 };
317
318 /* These are the interrupt / ack bits for each OOB request that are set
319 * in a6xx_gmu_set_oob and a6xx_clear_oob
320 */
321 static const struct a6xx_gmu_oob_bits a6xx_gmu_oob_bits[] = {
322 [GMU_OOB_GPU_SET] = {
323 .name = "GPU_SET",
324 .set = 16,
325 .ack = 24,
326 .set_new = 30,
327 .ack_new = 31,
328 .clear = 24,
329 .clear_new = 31,
330 },
331
332 [GMU_OOB_PERFCOUNTER_SET] = {
333 .name = "PERFCOUNTER",
334 .set = 17,
335 .ack = 25,
336 .set_new = 28,
337 .ack_new = 30,
338 .clear = 25,
339 .clear_new = 29,
340 },
341
342 [GMU_OOB_BOOT_SLUMBER] = {
343 .name = "BOOT_SLUMBER",
344 .set = 22,
345 .ack = 30,
346 .clear = 30,
347 },
348
349 [GMU_OOB_DCVS_SET] = {
350 .name = "GPU_DCVS",
351 .set = 23,
352 .ack = 31,
353 .clear = 31,
354 },
355 };
356
357 /* Trigger a OOB (out of band) request to the GMU */
a6xx_gmu_set_oob(struct a6xx_gmu * gmu,enum a6xx_gmu_oob_state state)358 int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
359 {
360 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
361 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
362 int ret;
363 u32 val;
364 int request, ack;
365
366 WARN_ON_ONCE(!mutex_is_locked(&gmu->lock));
367
368 /* Skip OOB calls since RGMU is not enabled */
369 if (adreno_has_rgmu(adreno_gpu))
370 return 0;
371
372 if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
373 return -EINVAL;
374
375 if (gmu->legacy) {
376 request = a6xx_gmu_oob_bits[state].set;
377 ack = a6xx_gmu_oob_bits[state].ack;
378 } else {
379 request = a6xx_gmu_oob_bits[state].set_new;
380 ack = a6xx_gmu_oob_bits[state].ack_new;
381 if (!request || !ack) {
382 DRM_DEV_ERROR(gmu->dev,
383 "Invalid non-legacy GMU request %s\n",
384 a6xx_gmu_oob_bits[state].name);
385 return -EINVAL;
386 }
387 }
388
389 /* Trigger the equested OOB operation */
390 gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << request);
391
392 do {
393 /* Wait for the acknowledge interrupt */
394 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO, val,
395 val & (1 << ack), 100, 10000);
396
397 if (!ret)
398 break;
399
400 if (completion_done(&a6xx_gpu->base.fault_coredump_done))
401 break;
402
403 /* We may timeout because the GMU is temporarily wedged from
404 * pending faults from the GPU and we are taking a devcoredump.
405 * Wait until the MMU is resumed and try again.
406 */
407 wait_for_completion(&a6xx_gpu->base.fault_coredump_done);
408 } while (true);
409
410 if (ret)
411 DRM_DEV_ERROR(gmu->dev,
412 "Timeout waiting for GMU OOB set %s: 0x%x\n",
413 a6xx_gmu_oob_bits[state].name,
414 gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO));
415
416 /* Clear the acknowledge interrupt */
417 gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, 1 << ack);
418
419 return ret;
420 }
421
422 /* Clear a pending OOB state in the GMU */
a6xx_gmu_clear_oob(struct a6xx_gmu * gmu,enum a6xx_gmu_oob_state state)423 void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
424 {
425 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
426 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
427 int bit;
428
429 WARN_ON_ONCE(!mutex_is_locked(&gmu->lock));
430
431 /* Skip OOB calls since RGMU is not enabled */
432 if (adreno_has_rgmu(adreno_gpu))
433 return;
434
435 if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
436 return;
437
438 if (gmu->legacy)
439 bit = a6xx_gmu_oob_bits[state].clear;
440 else
441 bit = a6xx_gmu_oob_bits[state].clear_new;
442
443 gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << bit);
444 }
445
446 /* Enable CPU control of SPTP power power collapse */
a6xx_sptprac_enable(struct a6xx_gmu * gmu)447 int a6xx_sptprac_enable(struct a6xx_gmu *gmu)
448 {
449 int ret;
450 u32 val;
451
452 WARN_ON(!gmu->legacy);
453
454 /* Nothing to do if GMU does the power management */
455 if (gmu->idle_level > GMU_IDLE_STATE_ACTIVE)
456 return 0;
457
458 gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778000);
459
460 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val,
461 (val & 0x38) == 0x28, 1, 100);
462
463 if (ret) {
464 DRM_DEV_ERROR(gmu->dev, "Unable to power on SPTPRAC: 0x%x\n",
465 gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS));
466 }
467
468 return 0;
469 }
470
471 /* Disable CPU control of SPTP power power collapse */
a6xx_sptprac_disable(struct a6xx_gmu * gmu)472 void a6xx_sptprac_disable(struct a6xx_gmu *gmu)
473 {
474 u32 val;
475 int ret;
476
477 if (!gmu->legacy)
478 return;
479
480 /* Make sure retention is on */
481 gmu_rmw(gmu, REG_A6XX_GPU_CC_GX_GDSCR, 0, (1 << 11));
482
483 gmu_write(gmu, REG_A6XX_GMU_GX_SPTPRAC_POWER_CONTROL, 0x778001);
484
485 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS, val,
486 (val & 0x04), 100, 10000);
487
488 if (ret)
489 DRM_DEV_ERROR(gmu->dev, "failed to power off SPTPRAC: 0x%x\n",
490 gmu_read(gmu, REG_A6XX_GMU_SPTPRAC_PWR_CLK_STATUS));
491 }
492
493 /* Let the GMU know we are starting a boot sequence */
a6xx_gmu_gfx_rail_on(struct a6xx_gmu * gmu)494 static int a6xx_gmu_gfx_rail_on(struct a6xx_gmu *gmu)
495 {
496 u32 vote;
497
498 /* Let the GMU know we are getting ready for boot */
499 gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 0);
500
501 /* Choose the "default" power level as the highest available */
502 vote = gmu->gx_arc_votes[gmu->nr_gpu_freqs - 1];
503
504 gmu_write(gmu, REG_A6XX_GMU_GX_VOTE_IDX, vote & 0xff);
505 gmu_write(gmu, REG_A6XX_GMU_MX_VOTE_IDX, (vote >> 8) & 0xff);
506
507 /* Let the GMU know the boot sequence has started */
508 return a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER);
509 }
510
a6xx_gemnoc_workaround(struct a6xx_gmu * gmu)511 static void a6xx_gemnoc_workaround(struct a6xx_gmu *gmu)
512 {
513 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
514 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
515
516 /*
517 * GEMNoC can power collapse whilst the GPU is being powered down, resulting
518 * in the power down sequence not being fully executed. That in turn can
519 * prevent CX_GDSC from collapsing. Assert Qactive to avoid this.
520 */
521 if (adreno_is_a8xx(adreno_gpu))
522 gmu_write(gmu, REG_A8XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, BIT(0));
523 else if (adreno_is_a7xx(adreno_gpu) || (adreno_is_a621(adreno_gpu) ||
524 adreno_is_7c3(adreno_gpu)))
525 gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, BIT(0));
526 }
527
528 /* Let the GMU know that we are about to go into slumber */
a6xx_gmu_notify_slumber(struct a6xx_gmu * gmu)529 static int a6xx_gmu_notify_slumber(struct a6xx_gmu *gmu)
530 {
531 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
532 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
533 int ret;
534
535 /* Disable the power counter so the GMU isn't busy */
536 if (adreno_is_a8xx(adreno_gpu))
537 gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
538 else
539 gmu_write(gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 0);
540
541 /* Disable SPTP_PC if the CPU is responsible for it */
542 if (gmu->idle_level < GMU_IDLE_STATE_SPTP)
543 a6xx_sptprac_disable(gmu);
544
545 if (!gmu->legacy) {
546 ret = a6xx_hfi_send_prep_slumber(gmu);
547 goto out;
548 }
549
550 /* Tell the GMU to get ready to slumber */
551 gmu_write(gmu, REG_A6XX_GMU_BOOT_SLUMBER_OPTION, 1);
552
553 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_BOOT_SLUMBER);
554 a6xx_gmu_clear_oob(gmu, GMU_OOB_BOOT_SLUMBER);
555
556 if (!ret) {
557 /* Check to see if the GMU really did slumber */
558 if (gmu_read(gmu, REG_A6XX_GPU_GMU_CX_GMU_RPMH_POWER_STATE)
559 != 0x0f) {
560 DRM_DEV_ERROR(gmu->dev, "The GMU did not go into slumber\n");
561 ret = -ETIMEDOUT;
562 }
563 }
564
565 out:
566 /* Put fence into allow mode */
567 gmu_write(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
568 a6xx_gemnoc_workaround(gmu);
569 return ret;
570 }
571
a6xx_rpmh_start(struct a6xx_gmu * gmu)572 static int a6xx_rpmh_start(struct a6xx_gmu *gmu)
573 {
574 int ret;
575 u32 val;
576
577 if (!test_and_clear_bit(GMU_STATUS_PDC_SLEEP, &gmu->status))
578 return 0;
579
580 gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, BIT(1));
581
582 ret = gmu_poll_timeout(gmu, REG_A6XX_GMU_RSCC_CONTROL_ACK, val,
583 val & (1 << 1), 100, 10000);
584 if (ret) {
585 DRM_DEV_ERROR(gmu->dev, "Unable to power on the GPU RSC\n");
586 return ret;
587 }
588
589 ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_SEQ_BUSY_DRV0, val,
590 !val, 100, 10000);
591
592 if (ret) {
593 DRM_DEV_ERROR(gmu->dev, "GPU RSC sequence stuck while waking up the GPU\n");
594 return ret;
595 }
596
597 gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
598
599 return 0;
600 }
601
a6xx_rpmh_stop(struct a6xx_gmu * gmu)602 static void a6xx_rpmh_stop(struct a6xx_gmu *gmu)
603 {
604 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
605 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
606 u32 bitmask = BIT(16);
607 int ret;
608 u32 val;
609
610 if (test_and_clear_bit(GMU_STATUS_FW_START, &gmu->status))
611 return;
612
613 if (adreno_is_a840(adreno_gpu))
614 bitmask = BIT(30);
615
616 gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 1);
617
618 ret = gmu_poll_timeout_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0,
619 val, val & bitmask, 100, 10000);
620 if (ret)
621 DRM_DEV_ERROR(gmu->dev, "Unable to power off the GPU RSC\n");
622
623 gmu_write(gmu, REG_A6XX_GMU_RSCC_CONTROL_REQ, 0);
624
625 set_bit(GMU_STATUS_PDC_SLEEP, &gmu->status);
626 }
627
pdc_write(void __iomem * ptr,u32 offset,u32 value)628 static inline void pdc_write(void __iomem *ptr, u32 offset, u32 value)
629 {
630 writel(value, ptr + (offset << 2));
631 }
632
a6xx_gmu_rpmh_init(struct a6xx_gmu * gmu)633 static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
634 {
635 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
636 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
637 struct platform_device *pdev = to_platform_device(gmu->dev);
638 u32 seqmem0_drv0_reg = REG_A6XX_RSCC_SEQ_MEM_0_DRV0;
639 void __iomem *seqptr = NULL;
640 uint32_t pdc_address_offset;
641 void __iomem *pdcptr;
642 bool pdc_in_aop = false;
643
644 /* On A8x and above, RPMH/PDC configurations are entirely configured in AOP */
645 if (adreno_is_a8xx(adreno_gpu))
646 return;
647
648 pdcptr = devm_platform_ioremap_resource_byname(pdev, "gmu_pdc");
649 if (IS_ERR(pdcptr))
650 return;
651
652 if (adreno_is_a650_family(adreno_gpu) ||
653 adreno_is_a7xx(adreno_gpu))
654 pdc_in_aop = true;
655 else if (adreno_is_a618(adreno_gpu) || adreno_is_a640_family(adreno_gpu))
656 pdc_address_offset = 0x30090;
657 else if (adreno_is_a619(adreno_gpu))
658 pdc_address_offset = 0x300a0;
659 else
660 pdc_address_offset = 0x30080;
661
662 if (!pdc_in_aop) {
663 seqptr = devm_platform_ioremap_resource_byname(pdev, "gmu_pdc_seq");
664 if (IS_ERR(seqptr))
665 return;
666 }
667
668 /* Disable SDE clock gating */
669 gmu_write_rscc(gmu, REG_A6XX_GPU_RSCC_RSC_STATUS0_DRV0, BIT(24));
670
671 /* Setup RSC PDC handshake for sleep and wakeup */
672 gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_SLAVE_ID_DRV0, 1);
673 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA, 0);
674 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR, 0);
675 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 2, 0);
676 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 2, 0);
677 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_DATA + 4,
678 adreno_is_a740_family(adreno_gpu) ? 0x80000021 : 0x80000000);
679 gmu_write_rscc(gmu, REG_A6XX_RSCC_HIDDEN_TCS_CMD0_ADDR + 4, 0);
680 gmu_write_rscc(gmu, REG_A6XX_RSCC_OVERRIDE_START_ADDR, 0);
681 gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_SEQ_START_ADDR, 0x4520);
682 gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_LO, 0x4510);
683 gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514);
684
685 /* The second spin of A7xx GPUs messed with some register offsets.. */
686 if (adreno_is_a740_family(adreno_gpu))
687 seqmem0_drv0_reg = REG_A7XX_RSCC_SEQ_MEM_0_DRV0_A740;
688
689 /* Load RSC sequencer uCode for sleep and wakeup */
690 if (adreno_is_a650_family(adreno_gpu) ||
691 adreno_is_a7xx(adreno_gpu)) {
692 gmu_write_rscc(gmu, seqmem0_drv0_reg, 0xeaaae5a0);
693 gmu_write_rscc(gmu, seqmem0_drv0_reg + 1, 0xe1a1ebab);
694 gmu_write_rscc(gmu, seqmem0_drv0_reg + 2, 0xa2e0a581);
695 gmu_write_rscc(gmu, seqmem0_drv0_reg + 3, 0xecac82e2);
696 gmu_write_rscc(gmu, seqmem0_drv0_reg + 4, 0x0020edad);
697 } else {
698 gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xa7a506a0);
699 gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xa1e6a6e7);
700 gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e081e1);
701 gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 3, 0xe9a982e2);
702 gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 4, 0x0020e8a8);
703 }
704
705 if (pdc_in_aop)
706 goto setup_pdc;
707
708 /* Load PDC sequencer uCode for power up and power down sequence */
709 pdc_write(seqptr, REG_A6XX_PDC_GPU_SEQ_MEM_0, 0xfebea1e1);
710 pdc_write(seqptr, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 1, 0xa5a4a3a2);
711 pdc_write(seqptr, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 2, 0x8382a6e0);
712 pdc_write(seqptr, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 3, 0xbce3e284);
713 pdc_write(seqptr, REG_A6XX_PDC_GPU_SEQ_MEM_0 + 4, 0x002081fc);
714
715 /* Set TCS commands used by PDC sequence for low power modes */
716 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD_ENABLE_BANK, 7);
717 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD_WAIT_FOR_CMPL_BANK, 0);
718 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CONTROL, 0);
719 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID, 0x10108);
720 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR, 0x30010);
721 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA, 1);
722 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 4, 0x10108);
723 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 4, 0x30000);
724 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 4, 0x0);
725
726 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_MSGID + 8, 0x10108);
727 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_ADDR + 8, pdc_address_offset);
728 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS1_CMD0_DATA + 8, 0x0);
729
730 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_ENABLE_BANK, 7);
731 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD_WAIT_FOR_CMPL_BANK, 0);
732 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CONTROL, 0);
733 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID, 0x10108);
734 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR, 0x30010);
735 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA, 2);
736
737 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 4, 0x10108);
738 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 4, 0x30000);
739 if (adreno_is_a618(adreno_gpu) || adreno_is_a619(adreno_gpu) ||
740 adreno_is_a650_family(adreno_gpu))
741 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x2);
742 else
743 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 4, 0x3);
744 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_MSGID + 8, 0x10108);
745 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_ADDR + 8, pdc_address_offset);
746 pdc_write(pdcptr, REG_A6XX_PDC_GPU_TCS3_CMD0_DATA + 8, 0x3);
747
748 /* Setup GPU PDC */
749 setup_pdc:
750 pdc_write(pdcptr, REG_A6XX_PDC_GPU_SEQ_START_ADDR, 0);
751 pdc_write(pdcptr, REG_A6XX_PDC_GPU_ENABLE_PDC, 0x80000001);
752
753 /* ensure no writes happen before the uCode is fully written */
754 wmb();
755 }
756
757 /*
758 * The lowest 16 bits of this value are the number of XO clock cycles for main
759 * hysteresis which is set at 0x1680 cycles (300 us). The higher 16 bits are
760 * for the shorter hysteresis that happens after main - this is 0xa (.5 us)
761 */
762
763 #define GMU_PWR_COL_HYST 0x000a1680
764
765 /* Set up the idle state for the GMU */
a6xx_gmu_power_config(struct a6xx_gmu * gmu)766 static void a6xx_gmu_power_config(struct a6xx_gmu *gmu)
767 {
768 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
769 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
770
771 /* Disable GMU WB/RB buffer */
772 gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1);
773 gmu_write(gmu, REG_A6XX_GMU_ICACHE_CONFIG, 0x1);
774 gmu_write(gmu, REG_A6XX_GMU_DCACHE_CONFIG, 0x1);
775
776 /* A7xx knows better by default! */
777 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
778 return;
779
780 gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400);
781
782 switch (gmu->idle_level) {
783 case GMU_IDLE_STATE_IFPC:
784 gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_HYST,
785 GMU_PWR_COL_HYST);
786 gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0,
787 A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE |
788 A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_HM_POWER_COLLAPSE_ENABLE);
789 fallthrough;
790 case GMU_IDLE_STATE_SPTP:
791 gmu_write(gmu, REG_A6XX_GMU_PWR_COL_SPTPRAC_HYST,
792 GMU_PWR_COL_HYST);
793 gmu_rmw(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0,
794 A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_IFPC_ENABLE |
795 A6XX_GMU_PWR_COL_INTER_FRAME_CTRL_SPTPRAC_POWER_CONTROL_ENABLE);
796 }
797
798 /* Enable RPMh GPU client */
799 gmu_rmw(gmu, REG_A6XX_GMU_RPMH_CTRL, 0,
800 A6XX_GMU_RPMH_CTRL_RPMH_INTERFACE_ENABLE |
801 A6XX_GMU_RPMH_CTRL_LLC_VOTE_ENABLE |
802 A6XX_GMU_RPMH_CTRL_DDR_VOTE_ENABLE |
803 A6XX_GMU_RPMH_CTRL_MX_VOTE_ENABLE |
804 A6XX_GMU_RPMH_CTRL_CX_VOTE_ENABLE |
805 A6XX_GMU_RPMH_CTRL_GFX_VOTE_ENABLE);
806 }
807
808 struct block_header {
809 u32 addr;
810 u32 size;
811 u32 type;
812 u32 value;
813 u32 data[];
814 };
815
fw_block_mem(struct a6xx_gmu_bo * bo,const struct block_header * blk)816 static bool fw_block_mem(struct a6xx_gmu_bo *bo, const struct block_header *blk)
817 {
818 if (!in_range(blk->addr, bo->iova, bo->size))
819 return false;
820
821 memcpy(bo->virt + blk->addr - bo->iova, blk->data, blk->size);
822 return true;
823 }
824
825 #define NEXT_BLK(blk) \
826 ((const struct block_header *)((const char *)(blk) + sizeof(*(blk)) + (blk)->size))
827
a6xx_gmu_fw_load(struct a6xx_gmu * gmu)828 static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu)
829 {
830 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
831 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
832 const struct firmware *fw_image = adreno_gpu->fw[ADRENO_FW_GMU];
833 const struct block_header *blk;
834 u32 reg_offset;
835 u32 ver;
836
837 u32 itcm_base = 0x00000000;
838 u32 dtcm_base = 0x00040000;
839
840 if (adreno_is_a650_family(adreno_gpu) ||
841 adreno_is_a7xx(adreno_gpu) ||
842 adreno_is_a8xx(adreno_gpu))
843 dtcm_base = 0x10004000;
844
845 if (gmu->legacy) {
846 /* Sanity check the size of the firmware that was loaded */
847 if (fw_image->size > 0x8000) {
848 DRM_DEV_ERROR(gmu->dev,
849 "GMU firmware is bigger than the available region\n");
850 return -EINVAL;
851 }
852
853 gmu_write_bulk(gmu, REG_A6XX_GMU_CM3_ITCM_START,
854 (u32*) fw_image->data, fw_image->size);
855 return 0;
856 }
857
858
859 for (blk = (const struct block_header *) fw_image->data;
860 (const u8*) blk < fw_image->data + fw_image->size;
861 blk = NEXT_BLK(blk)) {
862 if (blk->size == 0)
863 continue;
864
865 if (in_range(blk->addr, itcm_base, SZ_16K)) {
866 reg_offset = (blk->addr - itcm_base) >> 2;
867 gmu_write_bulk(gmu,
868 REG_A6XX_GMU_CM3_ITCM_START + reg_offset,
869 blk->data, blk->size);
870 } else if (in_range(blk->addr, dtcm_base, SZ_16K)) {
871 reg_offset = (blk->addr - dtcm_base) >> 2;
872 gmu_write_bulk(gmu,
873 REG_A6XX_GMU_CM3_DTCM_START + reg_offset,
874 blk->data, blk->size);
875 } else if (!fw_block_mem(&gmu->icache, blk) &&
876 !fw_block_mem(&gmu->dcache, blk) &&
877 !fw_block_mem(&gmu->dummy, blk)) {
878 DRM_DEV_ERROR(gmu->dev,
879 "failed to match fw block (addr=%.8x size=%d data[0]=%.8x)\n",
880 blk->addr, blk->size, blk->data[0]);
881 }
882 }
883
884 ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION);
885 DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n",
886 FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver),
887 FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver),
888 FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver));
889
890 return 0;
891 }
892
a6xx_gmu_fw_start(struct a6xx_gmu * gmu,unsigned int state)893 static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
894 {
895 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
896 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
897 struct msm_gpu *gpu = &adreno_gpu->base;
898 const struct a6xx_info *a6xx_info = adreno_gpu->info->a6xx;
899 const struct adreno_reglist *gbif_cx = a6xx_info->gbif_cx;
900 u32 fence_range_lower, fence_range_upper;
901 u32 chipid = 0;
902 int ret;
903
904 /* Vote veto for FAL10 */
905 if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) {
906 gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1);
907 gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1);
908 } else if (adreno_is_a8xx(adreno_gpu)) {
909 gmu_write(gmu, REG_A8XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1);
910 gmu_write(gmu, REG_A8XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1);
911 }
912
913 /* Turn on TCM (Tightly Coupled Memory) retention */
914 if (adreno_is_a7xx(adreno_gpu))
915 a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL, 1);
916 else if (!adreno_is_a8xx(adreno_gpu))
917 gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
918
919 ret = a6xx_rpmh_start(gmu);
920 if (ret)
921 return ret;
922
923 if (state == GMU_COLD_BOOT) {
924 if (WARN(!adreno_gpu->fw[ADRENO_FW_GMU],
925 "GMU firmware is not loaded\n"))
926 return -ENOENT;
927
928 ret = a6xx_gmu_fw_load(gmu);
929 if (ret)
930 return ret;
931 }
932
933 /* Clear init result to make sure we are getting a fresh value */
934 gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0);
935 gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02);
936
937 /* Write the iova of the HFI table */
938 gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi.iova);
939 gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1);
940
941 if (adreno_is_a8xx(adreno_gpu)) {
942 fence_range_upper = 0x32;
943 fence_range_lower = 0x8c0;
944 } else if (adreno_is_a7xx(adreno_gpu)) {
945 fence_range_upper = 0x32;
946 fence_range_lower = 0x8a0;
947 } else {
948 fence_range_upper = 0xa;
949 fence_range_lower = 0xa0;
950 }
951
952 gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0,
953 BIT(31) |
954 FIELD_PREP(GENMASK(30, 18), fence_range_upper) |
955 FIELD_PREP(GENMASK(17, 0), fence_range_lower));
956
957 /*
958 * Snapshots toggle the NMI bit which will result in a jump to the NMI
959 * handler instead of __main. Set the M3 config value to avoid that.
960 */
961 gmu_write(gmu, REG_A6XX_GMU_CM3_CFG, 0x4052);
962
963 if (a6xx_info->gmu_chipid) {
964 chipid = a6xx_info->gmu_chipid;
965 } else {
966 /*
967 * Note that the GMU has a slightly different layout for
968 * chip_id, for whatever reason, so a bit of massaging
969 * is needed. The upper 16b are the same, but minor and
970 * patchid are packed in four bits each with the lower
971 * 8b unused:
972 */
973 chipid = adreno_gpu->chip_id & 0xffff0000;
974 chipid |= (adreno_gpu->chip_id << 4) & 0xf000; /* minor */
975 chipid |= (adreno_gpu->chip_id << 8) & 0x0f00; /* patchid */
976 }
977
978 if (adreno_is_a8xx(adreno_gpu)) {
979 gmu_write(gmu, REG_A8XX_GMU_GENERAL_10, chipid);
980 gmu_write(gmu, REG_A8XX_GMU_GENERAL_8,
981 (gmu->log.iova & GENMASK(31, 12)) |
982 ((gmu->log.size / SZ_4K - 1) & GENMASK(7, 0)));
983 } else if (adreno_is_a7xx(adreno_gpu)) {
984 gmu_write(gmu, REG_A7XX_GMU_GENERAL_10, chipid);
985 gmu_write(gmu, REG_A7XX_GMU_GENERAL_8,
986 (gmu->log.iova & GENMASK(31, 12)) |
987 ((gmu->log.size / SZ_4K - 1) & GENMASK(7, 0)));
988 } else {
989 gmu_write(gmu, REG_A6XX_GMU_HFI_SFR_ADDR, chipid);
990
991 gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_MSG,
992 gmu->log.iova | (gmu->log.size / SZ_4K - 1));
993 }
994
995 /* For A7x and newer, do the CX GBIF configurations before GMU wake up */
996 for (int i = 0; (gbif_cx && gbif_cx[i].offset); i++)
997 gpu_write(gpu, gbif_cx[i].offset, gbif_cx[i].value);
998
999 if (adreno_is_a8xx(adreno_gpu)) {
1000 gpu_write(gpu, REG_A8XX_GBIF_CX_CONFIG, 0x20023000);
1001 gmu_write(gmu, REG_A6XX_GMU_MRC_GBIF_QOS_CTRL, 0x33);
1002 }
1003
1004 /* Set up the lowest idle level on the GMU */
1005 a6xx_gmu_power_config(gmu);
1006
1007 ret = a6xx_gmu_start(gmu);
1008 if (ret)
1009 return ret;
1010
1011 if (gmu->legacy) {
1012 ret = a6xx_gmu_gfx_rail_on(gmu);
1013 if (ret)
1014 return ret;
1015
1016 ret = a6xx_sptprac_enable(gmu);
1017 if (ret)
1018 return ret;
1019 }
1020
1021 ret = a6xx_gmu_hfi_start(gmu);
1022 if (ret)
1023 return ret;
1024
1025 /* FIXME: Do we need this wmb() here? */
1026 wmb();
1027
1028 return 0;
1029 }
1030
1031 #define A6XX_HFI_IRQ_MASK \
1032 (A6XX_GMU_GMU2HOST_INTR_INFO_CM3_FAULT)
1033
1034 #define A6XX_GMU_IRQ_MASK \
1035 (A6XX_GMU_AO_HOST_INTERRUPT_STATUS_WDOG_BITE | \
1036 A6XX_GMU_AO_HOST_INTERRUPT_STATUS_HOST_AHB_BUS_ERROR | \
1037 A6XX_GMU_AO_HOST_INTERRUPT_STATUS_FENCE_ERR)
1038
a6xx_gmu_irq_disable(struct a6xx_gmu * gmu)1039 static void a6xx_gmu_irq_disable(struct a6xx_gmu *gmu)
1040 {
1041 disable_irq(gmu->gmu_irq);
1042 disable_irq(gmu->hfi_irq);
1043
1044 gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~0);
1045 gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~0);
1046 }
1047
a6xx_gmu_rpmh_off(struct a6xx_gmu * gmu)1048 static void a6xx_gmu_rpmh_off(struct a6xx_gmu *gmu)
1049 {
1050 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1051 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1052 u32 val, seqmem_off = 0;
1053
1054 /* The second spin of A7xx GPUs messed with some register offsets.. */
1055 if (adreno_is_a740_family(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
1056 seqmem_off = 4;
1057
1058 /* Make sure there are no outstanding RPMh votes */
1059 gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS0_DRV0_STATUS + seqmem_off,
1060 val, (val & 1), 100, 10000);
1061 gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS1_DRV0_STATUS + seqmem_off,
1062 val, (val & 1), 100, 10000);
1063 gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS2_DRV0_STATUS + seqmem_off,
1064 val, (val & 1), 100, 10000);
1065 gmu_poll_timeout_rscc(gmu, REG_A6XX_RSCC_TCS3_DRV0_STATUS + seqmem_off,
1066 val, (val & 1), 100, 1000);
1067
1068 if (!adreno_is_a740_family(adreno_gpu) && !adreno_is_a8xx(adreno_gpu))
1069 return;
1070
1071 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS4_DRV0_STATUS + seqmem_off,
1072 val, (val & 1), 100, 10000);
1073 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS5_DRV0_STATUS + seqmem_off,
1074 val, (val & 1), 100, 10000);
1075 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS6_DRV0_STATUS + seqmem_off,
1076 val, (val & 1), 100, 10000);
1077 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS7_DRV0_STATUS + seqmem_off,
1078 val, (val & 1), 100, 1000);
1079 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS8_DRV0_STATUS + seqmem_off,
1080 val, (val & 1), 100, 10000);
1081 gmu_poll_timeout_rscc(gmu, REG_A7XX_RSCC_TCS9_DRV0_STATUS + seqmem_off,
1082 val, (val & 1), 100, 1000);
1083 }
1084
1085 /* Force the GMU off in case it isn't responsive */
a6xx_gmu_force_off(struct a6xx_gmu * gmu)1086 static void a6xx_gmu_force_off(struct a6xx_gmu *gmu)
1087 {
1088 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1089 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1090 struct msm_gpu *gpu = &adreno_gpu->base;
1091
1092 /*
1093 * Turn off keep alive that might have been enabled by the hang
1094 * interrupt
1095 */
1096 if (adreno_is_a8xx(adreno_gpu))
1097 gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1098 else
1099 gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 0);
1100
1101 /* Flush all the queues */
1102 a6xx_hfi_stop(gmu);
1103
1104 /* Stop the interrupts */
1105 a6xx_gmu_irq_disable(gmu);
1106
1107 /* Force off SPTP in case the GMU is managing it */
1108 a6xx_sptprac_disable(gmu);
1109
1110 a6xx_gemnoc_workaround(gmu);
1111
1112 /* Make sure there are no outstanding RPMh votes */
1113 a6xx_gmu_rpmh_off(gmu);
1114
1115 /* Clear the WRITEDROPPED fields and put fence into allow mode */
1116 gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS_CLR, 0x7);
1117 gmu_write(gmu, REG_A6XX_GMU_AO_AHB_FENCE_CTRL, 0);
1118
1119 /* Make sure the above writes go through */
1120 wmb();
1121
1122 /* Halt the gmu cm3 core */
1123 gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 1);
1124
1125 adreno_gpu->funcs->bus_halt(adreno_gpu, true);
1126
1127 /* Reset GPU core blocks */
1128 a6xx_gpu_sw_reset(gpu, true);
1129
1130 a6xx_rpmh_stop(gmu);
1131 }
1132
a6xx_gmu_set_initial_freq(struct msm_gpu * gpu,struct a6xx_gmu * gmu)1133 static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
1134 {
1135 struct dev_pm_opp *gpu_opp;
1136 unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
1137
1138 gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true);
1139 if (IS_ERR(gpu_opp))
1140 return;
1141
1142 gmu->freq = 0; /* so a6xx_gmu_set_freq() doesn't exit early */
1143 a6xx_gmu_set_freq(gpu, gpu_opp, false);
1144 dev_pm_opp_put(gpu_opp);
1145 }
1146
a6xx_gmu_set_initial_bw(struct msm_gpu * gpu,struct a6xx_gmu * gmu)1147 static void a6xx_gmu_set_initial_bw(struct msm_gpu *gpu, struct a6xx_gmu *gmu)
1148 {
1149 struct dev_pm_opp *gpu_opp;
1150 unsigned long gpu_freq = gmu->gpu_freqs[gmu->current_perf_index];
1151
1152 gpu_opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, gpu_freq, true);
1153 if (IS_ERR(gpu_opp))
1154 return;
1155
1156 dev_pm_opp_set_opp(&gpu->pdev->dev, gpu_opp);
1157 dev_pm_opp_put(gpu_opp);
1158 }
1159
a6xx_gmu_resume(struct a6xx_gpu * a6xx_gpu)1160 int a6xx_gmu_resume(struct a6xx_gpu *a6xx_gpu)
1161 {
1162 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1163 struct msm_gpu *gpu = &adreno_gpu->base;
1164 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1165 int status, ret;
1166
1167 if (WARN(!gmu->initialized, "The GMU is not set up yet\n"))
1168 return -EINVAL;
1169
1170 gmu->hung = false;
1171
1172 /* Turn on the resources */
1173 pm_runtime_get_sync(gmu->dev);
1174
1175 /*
1176 * "enable" the GX power domain which won't actually do anything but it
1177 * will make sure that the refcounting is correct in case we need to
1178 * bring down the GX after a GMU failure
1179 */
1180 if (!IS_ERR_OR_NULL(gmu->gxpd))
1181 pm_runtime_get_sync(gmu->gxpd);
1182
1183 /* Use a known rate to bring up the GMU */
1184 clk_set_rate(gmu->core_clk, 200000000);
1185 clk_set_rate(gmu->hub_clk, adreno_is_a740_family(adreno_gpu) ?
1186 200000000 : 150000000);
1187 ret = clk_bulk_prepare_enable(gmu->nr_clocks, gmu->clocks);
1188 if (ret) {
1189 pm_runtime_put(gmu->gxpd);
1190 pm_runtime_put(gmu->dev);
1191 return ret;
1192 }
1193
1194 /* Read the slice info on A8x GPUs */
1195 a8xx_gpu_get_slice_info(gpu);
1196
1197 /* Set the bus quota to a reasonable value for boot */
1198 a6xx_gmu_set_initial_bw(gpu, gmu);
1199
1200 /* Enable the GMU interrupt */
1201 gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_CLR, ~0);
1202 gmu_write(gmu, REG_A6XX_GMU_AO_HOST_INTERRUPT_MASK, ~A6XX_GMU_IRQ_MASK);
1203 enable_irq(gmu->gmu_irq);
1204
1205 /* Check to see if we are doing a cold or warm boot */
1206 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu)) {
1207 status = a6xx_llc_read(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL) == 1 ?
1208 GMU_WARM_BOOT : GMU_COLD_BOOT;
1209 } else if (gmu->legacy) {
1210 status = gmu_read(gmu, REG_A6XX_GMU_GENERAL_7) == 1 ?
1211 GMU_WARM_BOOT : GMU_COLD_BOOT;
1212 } else {
1213 /*
1214 * Warm boot path does not work on newer A6xx GPUs
1215 * Presumably this is because icache/dcache regions must be restored
1216 */
1217 status = GMU_COLD_BOOT;
1218 }
1219
1220 ret = a6xx_gmu_fw_start(gmu, status);
1221 if (ret)
1222 goto out;
1223
1224 ret = a6xx_hfi_start(gmu, status);
1225 if (ret)
1226 goto out;
1227
1228 /*
1229 * Turn on the GMU firmware fault interrupt after we know the boot
1230 * sequence is successful
1231 */
1232 gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_CLR, ~0);
1233 gmu_write(gmu, REG_A6XX_GMU_GMU2HOST_INTR_MASK, ~A6XX_HFI_IRQ_MASK);
1234 enable_irq(gmu->hfi_irq);
1235
1236 /* Set the GPU to the current freq */
1237 a6xx_gmu_set_initial_freq(gpu, gmu);
1238
1239 if (refcount_read(&gpu->sysprof_active) > 1) {
1240 ret = a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1241 if (!ret)
1242 set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status);
1243 }
1244 out:
1245 /* On failure, shut down the GMU to leave it in a good state */
1246 if (ret) {
1247 disable_irq(gmu->gmu_irq);
1248 a6xx_rpmh_stop(gmu);
1249 pm_runtime_put(gmu->gxpd);
1250 pm_runtime_put(gmu->dev);
1251 }
1252
1253 return ret;
1254 }
1255
a6xx_gmu_isidle(struct a6xx_gmu * gmu)1256 bool a6xx_gmu_isidle(struct a6xx_gmu *gmu)
1257 {
1258 u32 reg;
1259
1260 if (!gmu->initialized)
1261 return true;
1262
1263 reg = gmu_read(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS);
1264
1265 if (reg & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB)
1266 return false;
1267
1268 return true;
1269 }
1270
1271 /* Gracefully try to shut down the GMU and by extension the GPU */
a6xx_gmu_shutdown(struct a6xx_gmu * gmu)1272 static void a6xx_gmu_shutdown(struct a6xx_gmu *gmu)
1273 {
1274 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1275 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1276 u32 val;
1277 int ret;
1278
1279 /*
1280 * GMU firmware's internal power state gets messed up if we send "prepare_slumber" hfi when
1281 * oob_gpu handshake wasn't done after the last wake up. So do a dummy handshake here when
1282 * required
1283 */
1284 if (adreno_gpu->base.needs_hw_init) {
1285 if (a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET))
1286 goto force_off;
1287
1288 a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
1289 }
1290
1291 if (test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
1292 a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1293
1294 ret = a6xx_gmu_wait_for_idle(gmu);
1295
1296 /* If the GMU isn't responding assume it is hung */
1297 if (ret)
1298 goto force_off;
1299
1300 adreno_gpu->funcs->bus_halt(adreno_gpu, a6xx_gpu->hung);
1301
1302 /* tell the GMU we want to slumber */
1303 ret = a6xx_gmu_notify_slumber(gmu);
1304 if (ret)
1305 goto force_off;
1306
1307 ret = gmu_poll_timeout(gmu,
1308 REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS, val,
1309 !(val & A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS_GPUBUSYIGNAHB),
1310 100, 10000);
1311
1312 /*
1313 * Let the user know we failed to slumber but don't worry too
1314 * much because we are powering down anyway
1315 */
1316
1317 if (ret)
1318 DRM_DEV_ERROR(gmu->dev,
1319 "Unable to slumber GMU: status = 0%x/0%x\n",
1320 gmu_read(gmu,
1321 REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS),
1322 gmu_read(gmu,
1323 REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_STATUS2));
1324
1325 /* Turn off HFI */
1326 a6xx_hfi_stop(gmu);
1327
1328 /* Stop the interrupts and mask the hardware */
1329 a6xx_gmu_irq_disable(gmu);
1330
1331 /* Tell RPMh to power off the GPU */
1332 a6xx_rpmh_stop(gmu);
1333
1334 return;
1335
1336 force_off:
1337 a6xx_gmu_force_off(gmu);
1338 }
1339
1340
a6xx_gmu_stop(struct a6xx_gpu * a6xx_gpu)1341 int a6xx_gmu_stop(struct a6xx_gpu *a6xx_gpu)
1342 {
1343 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1344 struct msm_gpu *gpu = &a6xx_gpu->base.base;
1345
1346 if (!pm_runtime_active(gmu->dev))
1347 return 0;
1348
1349 /*
1350 * Force the GMU off if we detected a hang, otherwise try to shut it
1351 * down gracefully
1352 */
1353 if (gmu->hung)
1354 a6xx_gmu_force_off(gmu);
1355 else
1356 a6xx_gmu_shutdown(gmu);
1357
1358 /* Remove the bus vote */
1359 dev_pm_opp_set_opp(&gpu->pdev->dev, NULL);
1360
1361 /*
1362 * Make sure the GX domain is off before turning off the GMU (CX)
1363 * domain. Usually the GMU does this but only if the shutdown sequence
1364 * was successful
1365 */
1366 if (!IS_ERR_OR_NULL(gmu->gxpd))
1367 pm_runtime_put_sync(gmu->gxpd);
1368
1369 clk_bulk_disable_unprepare(gmu->nr_clocks, gmu->clocks);
1370
1371 pm_runtime_put_sync(gmu->dev);
1372
1373 return 0;
1374 }
1375
a6xx_gmu_memory_free(struct a6xx_gmu * gmu)1376 static void a6xx_gmu_memory_free(struct a6xx_gmu *gmu)
1377 {
1378 struct msm_mmu *mmu = to_msm_vm(gmu->vm)->mmu;
1379
1380 msm_gem_kernel_put(gmu->hfi.obj, gmu->vm);
1381 msm_gem_kernel_put(gmu->debug.obj, gmu->vm);
1382 msm_gem_kernel_put(gmu->icache.obj, gmu->vm);
1383 msm_gem_kernel_put(gmu->dcache.obj, gmu->vm);
1384 msm_gem_kernel_put(gmu->dummy.obj, gmu->vm);
1385 msm_gem_kernel_put(gmu->log.obj, gmu->vm);
1386
1387 mmu->funcs->detach(mmu);
1388 drm_gpuvm_put(gmu->vm);
1389 }
1390
a6xx_gmu_memory_alloc(struct a6xx_gmu * gmu,struct a6xx_gmu_bo * bo,size_t size,u64 iova,const char * name)1391 static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo,
1392 size_t size, u64 iova, const char *name)
1393 {
1394 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1395 struct drm_device *dev = a6xx_gpu->base.base.dev;
1396 uint32_t flags = MSM_BO_WC;
1397 u64 range_start, range_end;
1398 int ret;
1399
1400 size = PAGE_ALIGN(size);
1401 if (!iova) {
1402 /* no fixed address - use GMU's uncached range */
1403 range_start = 0x60000000 + PAGE_SIZE; /* skip dummy page */
1404 range_end = 0x80000000;
1405 } else {
1406 /* range for fixed address */
1407 range_start = iova;
1408 range_end = iova + size;
1409 /* use IOMMU_PRIV for icache/dcache */
1410 flags |= MSM_BO_MAP_PRIV;
1411 }
1412
1413 bo->obj = msm_gem_new(dev, size, flags);
1414 if (IS_ERR(bo->obj))
1415 return PTR_ERR(bo->obj);
1416
1417 ret = msm_gem_get_and_pin_iova_range(bo->obj, gmu->vm, &bo->iova,
1418 range_start, range_end);
1419 if (ret) {
1420 drm_gem_object_put(bo->obj);
1421 return ret;
1422 }
1423
1424 bo->virt = msm_gem_get_vaddr(bo->obj);
1425 bo->size = size;
1426
1427 msm_gem_object_set_name(bo->obj, "%s", name);
1428
1429 return 0;
1430 }
1431
a6xx_gmu_memory_probe(struct drm_device * drm,struct a6xx_gmu * gmu)1432 static int a6xx_gmu_memory_probe(struct drm_device *drm, struct a6xx_gmu *gmu)
1433 {
1434 struct msm_mmu *mmu;
1435
1436 mmu = msm_iommu_new(gmu->dev, 0);
1437 if (IS_ERR(mmu))
1438 return PTR_ERR(mmu);
1439
1440 gmu->vm = msm_gem_vm_create(drm, mmu, "gmu", 0x0, 0x80000000, true);
1441 if (IS_ERR(gmu->vm))
1442 return PTR_ERR(gmu->vm);
1443
1444 return 0;
1445 }
1446
1447 /**
1448 * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM)
1449 * @unit: divisor used to convert bytes/sec bw value to an RPMh msg
1450 * @width: multiplier used to convert bytes/sec bw value to an RPMh msg
1451 * @vcd: virtual clock domain that this bcm belongs to
1452 * @reserved: reserved field
1453 */
1454 struct bcm_db {
1455 __le32 unit;
1456 __le16 width;
1457 u8 vcd;
1458 u8 reserved;
1459 };
1460
a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu * adreno_gpu,const struct a6xx_info * info,struct a6xx_gmu * gmu)1461 static int a6xx_gmu_rpmh_bw_votes_init(struct adreno_gpu *adreno_gpu,
1462 const struct a6xx_info *info,
1463 struct a6xx_gmu *gmu)
1464 {
1465 const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 };
1466 unsigned int bcm_index, bw_index, bcm_count = 0;
1467
1468 /* Retrieve BCM data from cmd-db */
1469 for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
1470 const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
1471 size_t count;
1472
1473 /* Stop at NULL terminated bcm entry */
1474 if (!bcm->name)
1475 break;
1476
1477 bcm_data[bcm_index] = cmd_db_read_aux_data(bcm->name, &count);
1478 if (IS_ERR(bcm_data[bcm_index]))
1479 return PTR_ERR(bcm_data[bcm_index]);
1480
1481 if (!count) {
1482 dev_err(gmu->dev, "invalid BCM '%s' aux data size\n",
1483 bcm->name);
1484 return -EINVAL;
1485 }
1486
1487 bcm_count++;
1488 }
1489
1490 /* Generate BCM votes values for each bandwidth & BCM */
1491 for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) {
1492 u32 *data = gmu->gpu_ib_votes[bw_index];
1493 u32 bw = gmu->gpu_bw_table[bw_index];
1494
1495 /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */
1496 for (bcm_index = 0; bcm_index < bcm_count; bcm_index++) {
1497 const struct a6xx_bcm *bcm = &info->bcms[bcm_index];
1498 bool commit = false;
1499 u64 peak;
1500 u32 vote;
1501
1502 if (bcm_index == bcm_count - 1 ||
1503 (bcm_data[bcm_index + 1] &&
1504 bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd))
1505 commit = true;
1506
1507 if (!bw) {
1508 data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0);
1509 continue;
1510 }
1511
1512 if (bcm->fixed) {
1513 u32 perfmode = 0;
1514
1515 /* GMU on A6xx votes perfmode on all valid bandwidth */
1516 if (!adreno_is_a7xx(adreno_gpu) ||
1517 (bcm->perfmode_bw && bw >= bcm->perfmode_bw))
1518 perfmode = bcm->perfmode;
1519
1520 data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode);
1521 continue;
1522 }
1523
1524 /* Multiply the bandwidth by the width of the connection */
1525 peak = (u64)bw * le16_to_cpu(bcm_data[bcm_index]->width);
1526 do_div(peak, bcm->buswidth);
1527
1528 /* Input bandwidth value is in KBps, scale the value to BCM unit */
1529 peak *= 1000;
1530 do_div(peak, le32_to_cpu(bcm_data[bcm_index]->unit));
1531
1532 vote = clamp(peak, 1, BCM_TCS_CMD_VOTE_MASK);
1533
1534 /* GMUs on A7xx votes on both x & y */
1535 if (adreno_is_a7xx(adreno_gpu) || adreno_is_a8xx(adreno_gpu))
1536 data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote);
1537 else
1538 data[bcm_index] = BCM_TCS_CMD(commit, true, 0, vote);
1539 }
1540 }
1541
1542 return 0;
1543 }
1544
1545 /* Return the 'arc-level' for the given frequency */
a6xx_gmu_get_arc_level(struct device * dev,unsigned long freq)1546 static unsigned int a6xx_gmu_get_arc_level(struct device *dev,
1547 unsigned long freq)
1548 {
1549 struct dev_pm_opp *opp;
1550 unsigned int val;
1551
1552 if (!freq)
1553 return 0;
1554
1555 opp = dev_pm_opp_find_freq_exact(dev, freq, true);
1556 if (IS_ERR(opp))
1557 return 0;
1558
1559 val = dev_pm_opp_get_level(opp);
1560
1561 dev_pm_opp_put(opp);
1562
1563 return val;
1564 }
1565
a6xx_gmu_rpmh_arc_votes_init(struct device * dev,u32 * votes,unsigned long * freqs,int freqs_count,const char * pri_id,const char * sec_id)1566 static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes,
1567 unsigned long *freqs, int freqs_count,
1568 const char *pri_id, const char *sec_id)
1569 {
1570 int i, j;
1571 const u16 *pri, *sec;
1572 size_t pri_count, sec_count;
1573
1574 pri = cmd_db_read_aux_data(pri_id, &pri_count);
1575 if (IS_ERR(pri))
1576 return PTR_ERR(pri);
1577 /*
1578 * The data comes back as an array of unsigned shorts so adjust the
1579 * count accordingly
1580 */
1581 pri_count >>= 1;
1582 if (!pri_count)
1583 return -EINVAL;
1584
1585 sec = cmd_db_read_aux_data(sec_id, &sec_count);
1586 if (IS_ERR(sec))
1587 return PTR_ERR(sec);
1588
1589 sec_count >>= 1;
1590 if (!sec_count)
1591 return -EINVAL;
1592
1593 /* Construct a vote for each frequency */
1594 for (i = 0; i < freqs_count; i++) {
1595 u8 pindex = 0, sindex = 0;
1596 unsigned int level = a6xx_gmu_get_arc_level(dev, freqs[i]);
1597
1598 /* Get the primary index that matches the arc level */
1599 for (j = 0; j < pri_count; j++) {
1600 if (pri[j] >= level) {
1601 pindex = j;
1602 break;
1603 }
1604 }
1605
1606 if (j == pri_count) {
1607 DRM_DEV_ERROR(dev,
1608 "Level %u not found in the RPMh list\n",
1609 level);
1610 DRM_DEV_ERROR(dev, "Available levels:\n");
1611 for (j = 0; j < pri_count; j++)
1612 DRM_DEV_ERROR(dev, " %u\n", pri[j]);
1613
1614 return -EINVAL;
1615 }
1616
1617 /*
1618 * Look for a level in in the secondary list that matches. If
1619 * nothing fits, use the maximum non zero vote
1620 */
1621
1622 for (j = 0; j < sec_count; j++) {
1623 if (sec[j] >= level) {
1624 sindex = j;
1625 break;
1626 } else if (sec[j]) {
1627 sindex = j;
1628 }
1629 }
1630
1631 /* Construct the vote */
1632 votes[i] = ((pri[pindex] & 0xffff) << 16) |
1633 (sindex << 8) | pindex;
1634 }
1635
1636 return 0;
1637 }
1638
a6xx_gmu_rpmh_dep_votes_init(struct device * dev,u32 * votes,unsigned long * freqs,int freqs_count)1639 static int a6xx_gmu_rpmh_dep_votes_init(struct device *dev, u32 *votes,
1640 unsigned long *freqs, int freqs_count)
1641 {
1642 const u16 *mx;
1643 size_t count;
1644
1645 mx = cmd_db_read_aux_data("mx.lvl", &count);
1646 if (IS_ERR(mx))
1647 return PTR_ERR(mx);
1648 /*
1649 * The data comes back as an array of unsigned shorts so adjust the
1650 * count accordingly
1651 */
1652 count >>= 1;
1653 if (!count)
1654 return -EINVAL;
1655
1656 /* Fix the vote for zero frequency */
1657 votes[0] = 0xffffffff;
1658
1659 /* Construct a vote for rest of the corners */
1660 for (int i = 1; i < freqs_count; i++) {
1661 unsigned int level = a6xx_gmu_get_arc_level(dev, freqs[i]);
1662 u8 j, index = 0;
1663
1664 /* Get the primary index that matches the arc level */
1665 for (j = 0; j < count; j++) {
1666 if (mx[j] >= level) {
1667 index = j;
1668 break;
1669 }
1670 }
1671
1672 if (j == count) {
1673 DRM_DEV_ERROR(dev,
1674 "Mx Level %u not found in the RPMh list\n",
1675 level);
1676 DRM_DEV_ERROR(dev, "Available levels:\n");
1677 for (j = 0; j < count; j++)
1678 DRM_DEV_ERROR(dev, " %u\n", mx[j]);
1679
1680 return -EINVAL;
1681 }
1682
1683 /* Construct the vote */
1684 votes[i] = (0x3fff << 14) | (index << 8) | (0xff);
1685 }
1686
1687 return 0;
1688 }
1689
1690 /*
1691 * The GMU votes with the RPMh for itself and on behalf of the GPU but we need
1692 * to construct the list of votes on the CPU and send it over. Query the RPMh
1693 * voltage levels and build the votes
1694 * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries
1695 * and BCM parameters to build the votes.
1696 */
1697
a6xx_gmu_rpmh_votes_init(struct a6xx_gmu * gmu)1698 static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
1699 {
1700 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1701 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1702 const struct a6xx_info *info = adreno_gpu->info->a6xx;
1703 struct msm_gpu *gpu = &adreno_gpu->base;
1704 const char *sec_id;
1705 const u16 *gmxc;
1706 int ret;
1707
1708 gmxc = cmd_db_read_aux_data("gmxc.lvl", NULL);
1709 if (gmxc == ERR_PTR(-EPROBE_DEFER))
1710 return -EPROBE_DEFER;
1711
1712 /* If GMxC is present, prefer that as secondary rail for GX votes */
1713 sec_id = IS_ERR_OR_NULL(gmxc) ? "mx.lvl" : "gmxc.lvl";
1714
1715 /* Build the GX votes */
1716 ret = a6xx_gmu_rpmh_arc_votes_init(&gpu->pdev->dev, gmu->gx_arc_votes,
1717 gmu->gpu_freqs, gmu->nr_gpu_freqs, "gfx.lvl", sec_id);
1718
1719 /* Build the CX votes */
1720 ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
1721 gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl", "mx.lvl");
1722
1723 ret |= a6xx_gmu_rpmh_dep_votes_init(gmu->dev, gmu->dep_arc_votes,
1724 gmu->gpu_freqs, gmu->nr_gpu_freqs);
1725
1726 /* Build the interconnect votes */
1727 if (info->bcms && gmu->nr_gpu_bws > 1)
1728 ret |= a6xx_gmu_rpmh_bw_votes_init(adreno_gpu, info, gmu);
1729
1730 return ret;
1731 }
1732
a6xx_gmu_build_freq_table(struct device * dev,unsigned long * freqs,u32 size)1733 static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs,
1734 u32 size)
1735 {
1736 int count = dev_pm_opp_get_opp_count(dev);
1737 struct dev_pm_opp *opp;
1738 int i, index = 0;
1739 unsigned long freq = 1;
1740
1741 /*
1742 * The OPP table doesn't contain the "off" frequency level so we need to
1743 * add 1 to the table size to account for it
1744 */
1745
1746 if (WARN(count + 1 > size,
1747 "The GMU frequency table is being truncated\n"))
1748 count = size - 1;
1749
1750 /* Set the "off" frequency */
1751 freqs[index++] = 0;
1752
1753 for (i = 0; i < count; i++) {
1754 opp = dev_pm_opp_find_freq_ceil(dev, &freq);
1755 if (IS_ERR(opp))
1756 break;
1757
1758 dev_pm_opp_put(opp);
1759 freqs[index++] = freq++;
1760 }
1761
1762 return index;
1763 }
1764
a6xx_gmu_build_bw_table(struct device * dev,unsigned long * bandwidths,u32 size)1765 static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths,
1766 u32 size)
1767 {
1768 int count = dev_pm_opp_get_opp_count(dev);
1769 struct dev_pm_opp *opp;
1770 int i, index = 0;
1771 unsigned int bandwidth = 1;
1772
1773 /*
1774 * The OPP table doesn't contain the "off" bandwidth level so we need to
1775 * add 1 to the table size to account for it
1776 */
1777
1778 if (WARN(count + 1 > size,
1779 "The GMU bandwidth table is being truncated\n"))
1780 count = size - 1;
1781
1782 /* Set the "off" bandwidth */
1783 bandwidths[index++] = 0;
1784
1785 for (i = 0; i < count; i++) {
1786 opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0);
1787 if (IS_ERR(opp))
1788 break;
1789
1790 dev_pm_opp_put(opp);
1791 bandwidths[index++] = bandwidth++;
1792 }
1793
1794 return index;
1795 }
1796
a6xx_gmu_pwrlevels_probe(struct a6xx_gmu * gmu)1797 static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
1798 {
1799 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1800 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1801 const struct a6xx_info *info = adreno_gpu->info->a6xx;
1802 struct msm_gpu *gpu = &adreno_gpu->base;
1803
1804 int ret = 0;
1805
1806 /*
1807 * The GMU handles its own frequency switching so build a list of
1808 * available frequencies to send during initialization
1809 */
1810 ret = devm_pm_opp_of_add_table(gmu->dev);
1811 if (ret) {
1812 DRM_DEV_ERROR(gmu->dev, "Unable to set the OPP table for the GMU\n");
1813 return ret;
1814 }
1815
1816 gmu->nr_gmu_freqs = a6xx_gmu_build_freq_table(gmu->dev,
1817 gmu->gmu_freqs, ARRAY_SIZE(gmu->gmu_freqs));
1818
1819 /*
1820 * The GMU also handles GPU frequency switching so build a list
1821 * from the GPU OPP table
1822 */
1823 gmu->nr_gpu_freqs = a6xx_gmu_build_freq_table(&gpu->pdev->dev,
1824 gmu->gpu_freqs, ARRAY_SIZE(gmu->gpu_freqs));
1825
1826 gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
1827
1828 /*
1829 * The GMU also handles GPU Interconnect Votes so build a list
1830 * of DDR bandwidths from the GPU OPP table
1831 */
1832 if (info->bcms)
1833 gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev,
1834 gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table));
1835
1836 /* Build the list of RPMh votes that we'll send to the GMU */
1837 return a6xx_gmu_rpmh_votes_init(gmu);
1838 }
1839
a6xx_gmu_acd_probe(struct a6xx_gmu * gmu)1840 static int a6xx_gmu_acd_probe(struct a6xx_gmu *gmu)
1841 {
1842 struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
1843 struct a6xx_hfi_acd_table *cmd = &gmu->acd_table;
1844 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1845 struct msm_gpu *gpu = &adreno_gpu->base;
1846 int ret, i, cmd_idx = 0;
1847 extern bool disable_acd;
1848
1849 /* Skip ACD probe if requested via module param */
1850 if (disable_acd) {
1851 DRM_DEV_ERROR(gmu->dev, "Skipping GPU ACD probe\n");
1852 return 0;
1853 }
1854
1855 cmd->version = 1;
1856 cmd->stride = 1;
1857 cmd->enable_by_level = 0;
1858
1859 /* Skip freq = 0 and parse acd-level for rest of the OPPs */
1860 for (i = 1; i < gmu->nr_gpu_freqs; i++) {
1861 struct dev_pm_opp *opp;
1862 struct device_node *np;
1863 unsigned long freq;
1864 u32 val;
1865
1866 freq = gmu->gpu_freqs[i];
1867 /* This is unlikely to fail because we are passing back a known freq */
1868 opp = dev_pm_opp_find_freq_exact(&gpu->pdev->dev, freq, true);
1869 np = dev_pm_opp_get_of_node(opp);
1870
1871 ret = of_property_read_u32(np, "qcom,opp-acd-level", &val);
1872 of_node_put(np);
1873 dev_pm_opp_put(opp);
1874 if (ret == -EINVAL)
1875 continue;
1876 else if (ret) {
1877 DRM_DEV_ERROR(gmu->dev, "Unable to read acd level for freq %lu\n", freq);
1878 return ret;
1879 }
1880
1881 cmd->enable_by_level |= BIT(i);
1882 cmd->data[cmd_idx++] = val;
1883 }
1884
1885 cmd->num_levels = cmd_idx;
1886
1887 /* It is a problem if qmp node is unavailable when ACD is required */
1888 if (cmd->enable_by_level && IS_ERR_OR_NULL(gmu->qmp)) {
1889 DRM_DEV_ERROR(gmu->dev, "Unable to send ACD state to AOSS\n");
1890 return -EINVAL;
1891 }
1892
1893 /* Otherwise, nothing to do if qmp is unavailable */
1894 if (IS_ERR_OR_NULL(gmu->qmp))
1895 return 0;
1896
1897 /*
1898 * Notify AOSS about the ACD state. AOSS is supposed to assume that ACD is disabled on
1899 * system reset. So it is harmless if we couldn't notify 'OFF' state
1900 */
1901 ret = qmp_send(gmu->qmp, "{class: gpu, res: acd, val: %d}", !!cmd->enable_by_level);
1902 if (ret && cmd->enable_by_level) {
1903 DRM_DEV_ERROR(gmu->dev, "Failed to send ACD state to AOSS\n");
1904 return ret;
1905 }
1906
1907 return 0;
1908 }
1909
a6xx_gmu_clocks_probe(struct a6xx_gmu * gmu)1910 static int a6xx_gmu_clocks_probe(struct a6xx_gmu *gmu)
1911 {
1912 int ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks);
1913
1914 if (ret < 1)
1915 return ret;
1916
1917 gmu->nr_clocks = ret;
1918
1919 gmu->core_clk = msm_clk_bulk_get_clock(gmu->clocks,
1920 gmu->nr_clocks, "gmu");
1921
1922 gmu->hub_clk = msm_clk_bulk_get_clock(gmu->clocks,
1923 gmu->nr_clocks, "hub");
1924
1925 return 0;
1926 }
1927
a6xx_gmu_get_irq(struct a6xx_gmu * gmu,struct platform_device * pdev,const char * name,irq_handler_t handler)1928 static int a6xx_gmu_get_irq(struct a6xx_gmu *gmu, struct platform_device *pdev,
1929 const char *name, irq_handler_t handler)
1930 {
1931 int irq, ret;
1932
1933 irq = platform_get_irq_byname(pdev, name);
1934
1935 ret = request_irq(irq, handler, IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN, name, gmu);
1936 if (ret) {
1937 DRM_DEV_ERROR(&pdev->dev, "Unable to get interrupt %s %d\n",
1938 name, ret);
1939 return ret;
1940 }
1941
1942 return irq;
1943 }
1944
a6xx_gmu_sysprof_setup(struct msm_gpu * gpu)1945 void a6xx_gmu_sysprof_setup(struct msm_gpu *gpu)
1946 {
1947 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1948 struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
1949 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1950 unsigned int sysprof_active;
1951
1952 /* Nothing to do if GPU is suspended. We will handle this during GMU resume */
1953 if (!pm_runtime_get_if_active(&gpu->pdev->dev))
1954 return;
1955
1956 mutex_lock(&gmu->lock);
1957
1958 sysprof_active = refcount_read(&gpu->sysprof_active);
1959
1960 /*
1961 * 'Perfcounter select' register values are lost during IFPC collapse. To avoid that,
1962 * use the currently unused perfcounter oob vote to block IFPC when sysprof is active
1963 */
1964 if ((sysprof_active > 1) && !test_and_set_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
1965 a6xx_gmu_set_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1966 else if ((sysprof_active == 1) && test_and_clear_bit(GMU_STATUS_OOB_PERF_SET, &gmu->status))
1967 a6xx_gmu_clear_oob(gmu, GMU_OOB_PERFCOUNTER_SET);
1968
1969 mutex_unlock(&gmu->lock);
1970
1971 pm_runtime_put(&gpu->pdev->dev);
1972 }
1973
a6xx_gmu_remove(struct a6xx_gpu * a6xx_gpu)1974 void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu)
1975 {
1976 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
1977 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
1978
1979 mutex_lock(&gmu->lock);
1980 if (!gmu->initialized) {
1981 mutex_unlock(&gmu->lock);
1982 return;
1983 }
1984
1985 gmu->initialized = false;
1986
1987 mutex_unlock(&gmu->lock);
1988
1989 pm_runtime_force_suspend(gmu->dev);
1990
1991 /*
1992 * Since cxpd is a virt device, the devlink with gmu-dev will be removed
1993 * automatically when we do detach
1994 */
1995 dev_pm_domain_detach(gmu->cxpd, false);
1996
1997 if (!IS_ERR_OR_NULL(gmu->gxpd)) {
1998 pm_runtime_disable(gmu->gxpd);
1999 dev_pm_domain_detach(gmu->gxpd, false);
2000 }
2001
2002 if (!IS_ERR_OR_NULL(gmu->qmp))
2003 qmp_put(gmu->qmp);
2004
2005 iounmap(gmu->mmio);
2006 gmu->mmio = NULL;
2007 gmu->rscc = NULL;
2008
2009 if (!adreno_has_gmu_wrapper(adreno_gpu) &&
2010 !adreno_has_rgmu(adreno_gpu)) {
2011 a6xx_gmu_memory_free(gmu);
2012
2013 free_irq(gmu->gmu_irq, gmu);
2014 free_irq(gmu->hfi_irq, gmu);
2015 }
2016
2017 /* Drop reference taken in of_find_device_by_node */
2018 put_device(gmu->dev);
2019 }
2020
cxpd_notifier_cb(struct notifier_block * nb,unsigned long action,void * data)2021 static int cxpd_notifier_cb(struct notifier_block *nb,
2022 unsigned long action, void *data)
2023 {
2024 struct a6xx_gmu *gmu = container_of(nb, struct a6xx_gmu, pd_nb);
2025
2026 if (action == GENPD_NOTIFY_OFF)
2027 complete_all(&gmu->pd_gate);
2028
2029 return 0;
2030 }
2031
a6xx_gmu_get_mmio(struct platform_device * pdev,const char * name,resource_size_t * start)2032 static void __iomem *a6xx_gmu_get_mmio(struct platform_device *pdev,
2033 const char *name, resource_size_t *start)
2034 {
2035 void __iomem *ret;
2036 struct resource *res = platform_get_resource_byname(pdev,
2037 IORESOURCE_MEM, name);
2038
2039 if (!res) {
2040 DRM_DEV_ERROR(&pdev->dev, "Unable to find the %s registers\n", name);
2041 return ERR_PTR(-EINVAL);
2042 }
2043
2044 ret = ioremap(res->start, resource_size(res));
2045 if (!ret) {
2046 DRM_DEV_ERROR(&pdev->dev, "Unable to map the %s registers\n", name);
2047 return ERR_PTR(-EINVAL);
2048 }
2049
2050 if (start)
2051 *start = res->start;
2052
2053 return ret;
2054 }
2055
a6xx_gmu_wrapper_init(struct a6xx_gpu * a6xx_gpu,struct device_node * node)2056 int a6xx_gmu_wrapper_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
2057 {
2058 struct platform_device *pdev = of_find_device_by_node(node);
2059 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2060 struct msm_gpu *gpu = &adreno_gpu->base;
2061 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2062 resource_size_t start;
2063 struct resource *res;
2064 int ret;
2065
2066 if (!pdev)
2067 return -ENODEV;
2068
2069 gmu->dev = &pdev->dev;
2070
2071 ret = of_dma_configure(gmu->dev, node, true);
2072 if (ret)
2073 return ret;
2074
2075 pm_runtime_enable(gmu->dev);
2076
2077 /* Mark legacy for manual SPTPRAC control */
2078 gmu->legacy = true;
2079
2080 /* RGMU requires clocks */
2081 ret = devm_clk_bulk_get_all(gmu->dev, &gmu->clocks);
2082 if (ret < 0)
2083 goto err_clk;
2084
2085 gmu->nr_clocks = ret;
2086
2087 /* Map the GMU registers */
2088 gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu", &start);
2089 if (IS_ERR(gmu->mmio)) {
2090 ret = PTR_ERR(gmu->mmio);
2091 goto err_mmio;
2092 }
2093
2094 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, "kgsl_3d0_reg_memory");
2095 if (!res) {
2096 ret = -EINVAL;
2097 goto err_mmio;
2098 }
2099
2100 /* Identify gmu base offset from gpu base address */
2101 gmu->mmio_offset = (u32)(start - res->start);
2102
2103 gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
2104 if (IS_ERR(gmu->cxpd)) {
2105 ret = PTR_ERR(gmu->cxpd);
2106 goto err_mmio;
2107 }
2108
2109 if (!device_link_add(gmu->dev, gmu->cxpd, DL_FLAG_PM_RUNTIME)) {
2110 ret = -ENODEV;
2111 goto detach_cxpd;
2112 }
2113
2114 init_completion(&gmu->pd_gate);
2115 complete_all(&gmu->pd_gate);
2116 gmu->pd_nb.notifier_call = cxpd_notifier_cb;
2117
2118 /* Get a link to the GX power domain to reset the GPU */
2119 gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
2120 if (IS_ERR(gmu->gxpd)) {
2121 ret = PTR_ERR(gmu->gxpd);
2122 goto err_mmio;
2123 }
2124
2125 gmu->initialized = true;
2126
2127 return 0;
2128
2129 detach_cxpd:
2130 dev_pm_domain_detach(gmu->cxpd, false);
2131
2132 err_mmio:
2133 iounmap(gmu->mmio);
2134
2135 err_clk:
2136 /* Drop reference taken in of_find_device_by_node */
2137 put_device(gmu->dev);
2138
2139 return ret;
2140 }
2141
a6xx_gmu_init(struct a6xx_gpu * a6xx_gpu,struct device_node * node)2142 int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node)
2143 {
2144 struct platform_device *pdev = of_find_device_by_node(node);
2145 struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
2146 struct msm_gpu *gpu = &adreno_gpu->base;
2147 struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
2148 struct device_link *link;
2149 resource_size_t start;
2150 struct resource *res;
2151 int ret;
2152
2153 if (!pdev)
2154 return -ENODEV;
2155
2156 gmu->dev = &pdev->dev;
2157
2158 ret = of_dma_configure(gmu->dev, node, true);
2159 if (ret)
2160 return ret;
2161
2162 /* Set GMU idle level */
2163 gmu->idle_level = (adreno_gpu->info->quirks & ADRENO_QUIRK_IFPC) ?
2164 GMU_IDLE_STATE_IFPC : GMU_IDLE_STATE_ACTIVE;
2165
2166 pm_runtime_enable(gmu->dev);
2167
2168 /* Get the list of clocks */
2169 ret = a6xx_gmu_clocks_probe(gmu);
2170 if (ret)
2171 goto err_put_device;
2172
2173 ret = a6xx_gmu_memory_probe(adreno_gpu->base.dev, gmu);
2174 if (ret)
2175 goto err_put_device;
2176
2177
2178 /* A660 now requires handling "prealloc requests" in GMU firmware
2179 * For now just hardcode allocations based on the known firmware.
2180 * note: there is no indication that these correspond to "dummy" or
2181 * "debug" regions, but this "guess" allows reusing these BOs which
2182 * are otherwise unused by a660.
2183 */
2184 gmu->dummy.size = SZ_4K;
2185 if (adreno_is_a660_family(adreno_gpu) ||
2186 adreno_is_a7xx(adreno_gpu) ||
2187 adreno_is_a8xx(adreno_gpu)) {
2188 ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_4K * 7,
2189 0x60400000, "debug");
2190 if (ret)
2191 goto err_memory;
2192
2193 gmu->dummy.size = SZ_16K;
2194 }
2195
2196 /* Allocate memory for the GMU dummy page */
2197 ret = a6xx_gmu_memory_alloc(gmu, &gmu->dummy, gmu->dummy.size,
2198 0x60000000, "dummy");
2199 if (ret)
2200 goto err_memory;
2201
2202 /* Note that a650 family also includes a660 family: */
2203 if (adreno_is_a650_family(adreno_gpu) ||
2204 adreno_is_a7xx(adreno_gpu) ||
2205 adreno_is_a8xx(adreno_gpu)) {
2206 ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
2207 SZ_16M - SZ_16K, 0x04000, "icache");
2208 if (ret)
2209 goto err_memory;
2210 /*
2211 * NOTE: when porting legacy ("pre-650-family") GPUs you may be tempted to add a condition
2212 * to allocate icache/dcache here, as per downstream code flow, but it may not actually be
2213 * necessary. If you omit this step and you don't get random pagefaults, you are likely
2214 * good to go without this!
2215 */
2216 } else if (adreno_is_a640_family(adreno_gpu)) {
2217 ret = a6xx_gmu_memory_alloc(gmu, &gmu->icache,
2218 SZ_256K - SZ_16K, 0x04000, "icache");
2219 if (ret)
2220 goto err_memory;
2221
2222 ret = a6xx_gmu_memory_alloc(gmu, &gmu->dcache,
2223 SZ_256K - SZ_16K, 0x44000, "dcache");
2224 if (ret)
2225 goto err_memory;
2226 } else if (adreno_is_a630_family(adreno_gpu)) {
2227 /* HFI v1, has sptprac */
2228 gmu->legacy = true;
2229
2230 /* Allocate memory for the GMU debug region */
2231 ret = a6xx_gmu_memory_alloc(gmu, &gmu->debug, SZ_16K, 0, "debug");
2232 if (ret)
2233 goto err_memory;
2234 }
2235
2236 /* Allocate memory for the GMU log region */
2237 ret = a6xx_gmu_memory_alloc(gmu, &gmu->log, SZ_16K, 0, "log");
2238 if (ret)
2239 goto err_memory;
2240
2241 /* Allocate memory for for the HFI queues */
2242 ret = a6xx_gmu_memory_alloc(gmu, &gmu->hfi, SZ_16K, 0, "hfi");
2243 if (ret)
2244 goto err_memory;
2245
2246 /* Map the GMU registers */
2247 gmu->mmio = a6xx_gmu_get_mmio(pdev, "gmu", &start);
2248 if (IS_ERR(gmu->mmio)) {
2249 ret = PTR_ERR(gmu->mmio);
2250 goto err_memory;
2251 }
2252
2253 res = platform_get_resource_byname(gpu->pdev, IORESOURCE_MEM, "kgsl_3d0_reg_memory");
2254 if (!res) {
2255 ret = -EINVAL;
2256 goto err_mmio;
2257 }
2258
2259 /* Identify gmu base offset from gpu base address */
2260 gmu->mmio_offset = (u32)(start - res->start);
2261
2262 if (adreno_is_a650_family(adreno_gpu) ||
2263 adreno_is_a7xx(adreno_gpu)) {
2264 gmu->rscc = devm_platform_ioremap_resource_byname(pdev, "rscc");
2265 if (IS_ERR(gmu->rscc)) {
2266 ret = -ENODEV;
2267 goto err_mmio;
2268 }
2269 } else if (adreno_is_a8xx(adreno_gpu)) {
2270 gmu->rscc = gmu->mmio + 0x19000;
2271 } else {
2272 gmu->rscc = gmu->mmio + 0x23000;
2273 }
2274
2275 /* Get the HFI and GMU interrupts */
2276 gmu->hfi_irq = a6xx_gmu_get_irq(gmu, pdev, "hfi", a6xx_hfi_irq);
2277 gmu->gmu_irq = a6xx_gmu_get_irq(gmu, pdev, "gmu", a6xx_gmu_irq);
2278
2279 if (gmu->hfi_irq < 0 || gmu->gmu_irq < 0) {
2280 ret = -ENODEV;
2281 goto err_mmio;
2282 }
2283
2284 gmu->cxpd = dev_pm_domain_attach_by_name(gmu->dev, "cx");
2285 if (IS_ERR(gmu->cxpd)) {
2286 ret = PTR_ERR(gmu->cxpd);
2287 goto err_mmio;
2288 }
2289
2290 link = device_link_add(gmu->dev, gmu->cxpd, DL_FLAG_PM_RUNTIME);
2291 if (!link) {
2292 ret = -ENODEV;
2293 goto detach_cxpd;
2294 }
2295
2296 /* Other errors are handled during GPU ACD probe */
2297 gmu->qmp = qmp_get(gmu->dev);
2298 if (PTR_ERR_OR_ZERO(gmu->qmp) == -EPROBE_DEFER) {
2299 ret = -EPROBE_DEFER;
2300 goto detach_gxpd;
2301 }
2302
2303 init_completion(&gmu->pd_gate);
2304 complete_all(&gmu->pd_gate);
2305 gmu->pd_nb.notifier_call = cxpd_notifier_cb;
2306
2307 /*
2308 * Get a link to the GX power domain to reset the GPU in case of GMU
2309 * crash
2310 */
2311 gmu->gxpd = dev_pm_domain_attach_by_name(gmu->dev, "gx");
2312
2313 /* Get the power levels for the GMU and GPU */
2314 a6xx_gmu_pwrlevels_probe(gmu);
2315
2316 ret = a6xx_gmu_acd_probe(gmu);
2317 if (ret)
2318 goto detach_gxpd;
2319
2320 /* Set up the HFI queues */
2321 a6xx_hfi_init(gmu);
2322
2323 /* Initialize RPMh */
2324 a6xx_gmu_rpmh_init(gmu);
2325
2326 gmu->initialized = true;
2327
2328 return 0;
2329
2330 detach_gxpd:
2331 if (!IS_ERR_OR_NULL(gmu->gxpd))
2332 dev_pm_domain_detach(gmu->gxpd, false);
2333
2334 if (!IS_ERR_OR_NULL(gmu->qmp))
2335 qmp_put(gmu->qmp);
2336
2337 device_link_del(link);
2338
2339 detach_cxpd:
2340 dev_pm_domain_detach(gmu->cxpd, false);
2341
2342 err_mmio:
2343 iounmap(gmu->mmio);
2344 free_irq(gmu->gmu_irq, gmu);
2345 free_irq(gmu->hfi_irq, gmu);
2346
2347 err_memory:
2348 a6xx_gmu_memory_free(gmu);
2349 err_put_device:
2350 /* Drop reference taken in of_find_device_by_node */
2351 put_device(gmu->dev);
2352
2353 return ret;
2354 }
2355