1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3 */
4 #include "a4xx_gpu.h"
5
6 #define A4XX_INT0_MASK \
7 (A4XX_INT0_RBBM_AHB_ERROR | \
8 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 A4XX_INT0_CP_T0_PACKET_IN_IB | \
10 A4XX_INT0_CP_OPCODE_ERROR | \
11 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 A4XX_INT0_CP_HW_FAULT | \
13 A4XX_INT0_CP_IB1_INT | \
14 A4XX_INT0_CP_IB2_INT | \
15 A4XX_INT0_CP_RB_INT | \
16 A4XX_INT0_CP_REG_PROTECT_FAULT | \
17 A4XX_INT0_CP_AHB_ERROR_HALT | \
18 A4XX_INT0_CACHE_FLUSH_TS | \
19 A4XX_INT0_UCHE_OOB_ACCESS)
20
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24
a4xx_submit(struct msm_gpu * gpu,struct msm_gem_submit * submit)25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27 struct msm_ringbuffer *ring = submit->ring;
28 unsigned int i;
29
30 for (i = 0; i < submit->nr_cmds; i++) {
31 switch (submit->cmd[i].type) {
32 case MSM_SUBMIT_CMD_IB_TARGET_BUF:
33 /* ignore IB-targets */
34 break;
35 case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
36 /* ignore if there has not been a ctx switch: */
37 if (gpu->cur_ctx_seqno == submit->queue->ctx->seqno)
38 break;
39 fallthrough;
40 case MSM_SUBMIT_CMD_BUF:
41 OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
42 OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
43 OUT_RING(ring, submit->cmd[i].size);
44 OUT_PKT2(ring);
45 break;
46 }
47 }
48
49 OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
50 OUT_RING(ring, submit->seqno);
51
52 /* Flush HLSQ lazy updates to make sure there is nothing
53 * pending for indirect loads after the timestamp has
54 * passed:
55 */
56 OUT_PKT3(ring, CP_EVENT_WRITE, 1);
57 OUT_RING(ring, HLSQ_FLUSH);
58
59 /* wait for idle before cache flush/interrupt */
60 OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
61 OUT_RING(ring, 0x00000000);
62
63 /* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
64 OUT_PKT3(ring, CP_EVENT_WRITE, 3);
65 OUT_RING(ring, CACHE_FLUSH_TS | CP_EVENT_WRITE_0_IRQ);
66 OUT_RING(ring, rbmemptr(ring, fence));
67 OUT_RING(ring, submit->seqno);
68
69 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
70 }
71
72 /*
73 * a4xx_enable_hwcg() - Program the clock control registers
74 * @device: The adreno device pointer
75 */
a4xx_enable_hwcg(struct msm_gpu * gpu)76 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
77 {
78 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
79 unsigned int i;
80 for (i = 0; i < 4; i++)
81 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
82 for (i = 0; i < 4; i++)
83 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
84 for (i = 0; i < 4; i++)
85 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
86 for (i = 0; i < 4; i++)
87 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
88 for (i = 0; i < 4; i++)
89 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
90 for (i = 0; i < 4; i++)
91 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
92 for (i = 0; i < 4; i++)
93 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
94 for (i = 0; i < 4; i++)
95 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
96 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
97 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
98 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
99 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
100 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
101 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
102 for (i = 0; i < 4; i++)
103 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
104
105 /* Disable L1 clocking in A420 due to CCU issues with it */
106 for (i = 0; i < 4; i++) {
107 if (adreno_is_a420(adreno_gpu)) {
108 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
109 0x00002020);
110 } else {
111 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
112 0x00022020);
113 }
114 }
115
116 /* No CCU for A405 */
117 if (!adreno_is_a405(adreno_gpu)) {
118 for (i = 0; i < 4; i++) {
119 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
120 0x00000922);
121 }
122
123 for (i = 0; i < 4; i++) {
124 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
125 0x00000000);
126 }
127
128 for (i = 0; i < 4; i++) {
129 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
130 0x00000001);
131 }
132 }
133
134 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
135 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
136 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
137 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
138 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
139 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
140 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
141 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
142 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
143 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
144 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
145 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
146 /* Early A430's have a timing issue with SP/TP power collapse;
147 disabling HW clock gating prevents it. */
148 if (adreno_is_a430(adreno_gpu) && adreno_patchid(adreno_gpu) < 2)
149 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
150 else
151 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
152 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
153 }
154
155
a4xx_me_init(struct msm_gpu * gpu)156 static bool a4xx_me_init(struct msm_gpu *gpu)
157 {
158 struct msm_ringbuffer *ring = gpu->rb[0];
159
160 OUT_PKT3(ring, CP_ME_INIT, 17);
161 OUT_RING(ring, 0x000003f7);
162 OUT_RING(ring, 0x00000000);
163 OUT_RING(ring, 0x00000000);
164 OUT_RING(ring, 0x00000000);
165 OUT_RING(ring, 0x00000080);
166 OUT_RING(ring, 0x00000100);
167 OUT_RING(ring, 0x00000180);
168 OUT_RING(ring, 0x00006600);
169 OUT_RING(ring, 0x00000150);
170 OUT_RING(ring, 0x0000014e);
171 OUT_RING(ring, 0x00000154);
172 OUT_RING(ring, 0x00000001);
173 OUT_RING(ring, 0x00000000);
174 OUT_RING(ring, 0x00000000);
175 OUT_RING(ring, 0x00000000);
176 OUT_RING(ring, 0x00000000);
177 OUT_RING(ring, 0x00000000);
178
179 adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
180 return a4xx_idle(gpu);
181 }
182
a4xx_hw_init(struct msm_gpu * gpu)183 static int a4xx_hw_init(struct msm_gpu *gpu)
184 {
185 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
186 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
187 uint32_t *ptr, len;
188 int i, ret;
189
190 if (adreno_is_a405(adreno_gpu)) {
191 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
192 } else if (adreno_is_a420(adreno_gpu)) {
193 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
194 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
195 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
196 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
197 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
198 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
199 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
200 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
201 } else if (adreno_is_a430(adreno_gpu)) {
202 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
203 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
204 gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
205 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
206 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
207 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
208 } else {
209 BUG();
210 }
211
212 /* Make all blocks contribute to the GPU BUSY perf counter */
213 gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
214
215 /* Tune the hystersis counters for SP and CP idle detection */
216 gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
217 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
218
219 if (adreno_is_a430(adreno_gpu)) {
220 gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
221 }
222
223 /* Enable the RBBM error reporting bits */
224 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
225
226 /* Enable AHB error reporting*/
227 gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
228
229 /* Enable power counters*/
230 gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
231
232 /*
233 * Turn on hang detection - this spews a lot of useful information
234 * into the RBBM registers on a hang:
235 */
236 gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
237 (1 << 30) | 0xFFFF);
238
239 gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
240 (unsigned int)(a4xx_gpu->ocmem.base >> 14));
241
242 /* Turn on performance counters: */
243 gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
244
245 /* use the first CP counter for timestamp queries.. userspace may set
246 * this as well but it selects the same counter/countable:
247 */
248 gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
249
250 if (adreno_is_a430(adreno_gpu))
251 gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
252
253 /* Disable L2 bypass to avoid UCHE out of bounds errors */
254 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
255 gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
256
257 gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
258 (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
259
260 /* On A430 enable SP regfile sleep for power savings */
261 /* TODO downstream does this for !420, so maybe applies for 405 too? */
262 if (!adreno_is_a420(adreno_gpu)) {
263 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
264 0x00000441);
265 gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
266 0x00000441);
267 }
268
269 a4xx_enable_hwcg(gpu);
270
271 /*
272 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
273 * due to timing issue with HLSQ_TP_CLK_EN
274 */
275 if (adreno_is_a420(adreno_gpu)) {
276 unsigned int val;
277 val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
278 val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
279 val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
280 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
281 }
282
283 /* setup access protection: */
284 gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
285
286 /* RBBM registers */
287 gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
288 gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
289 gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
290 gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
291 gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
292 gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
293
294 /* CP registers */
295 gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
296 gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
297
298
299 /* RB registers */
300 gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
301
302 /* HLSQ registers */
303 gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
304
305 /* VPC registers */
306 gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
307
308 /* SMMU registers */
309 gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
310
311 gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
312
313 ret = adreno_hw_init(gpu);
314 if (ret)
315 return ret;
316
317 /*
318 * Use the default ringbuffer size and block size but disable the RPTR
319 * shadow
320 */
321 gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
322 MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
323
324 /* Set the ringbuffer address */
325 gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
326
327 /* Load PM4: */
328 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
329 len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
330 DBG("loading PM4 ucode version: %u", ptr[0]);
331 gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
332 for (i = 1; i < len; i++)
333 gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
334
335 /* Load PFP: */
336 ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
337 len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
338 DBG("loading PFP ucode version: %u", ptr[0]);
339
340 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
341 for (i = 1; i < len; i++)
342 gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
343
344 /* clear ME_HALT to start micro engine */
345 gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
346
347 return a4xx_me_init(gpu) ? 0 : -EINVAL;
348 }
349
a4xx_recover(struct msm_gpu * gpu)350 static void a4xx_recover(struct msm_gpu *gpu)
351 {
352 int i;
353
354 adreno_dump_info(gpu);
355
356 for (i = 0; i < 8; i++) {
357 printk("CP_SCRATCH_REG%d: %u\n", i,
358 gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
359 }
360
361 /* dump registers before resetting gpu, if enabled: */
362 if (hang_debug)
363 a4xx_dump(gpu);
364
365 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
366 gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
367 gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
368 adreno_recover(gpu);
369 }
370
a4xx_destroy(struct msm_gpu * gpu)371 static void a4xx_destroy(struct msm_gpu *gpu)
372 {
373 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
375
376 DBG("%s", gpu->name);
377
378 adreno_gpu_cleanup(adreno_gpu);
379
380 adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
381
382 kfree(a4xx_gpu);
383 }
384
a4xx_idle(struct msm_gpu * gpu)385 static bool a4xx_idle(struct msm_gpu *gpu)
386 {
387 /* wait for ringbuffer to drain: */
388 if (!adreno_idle(gpu, gpu->rb[0]))
389 return false;
390
391 /* then wait for GPU to finish: */
392 if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
393 A4XX_RBBM_STATUS_GPU_BUSY))) {
394 DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
395 /* TODO maybe we need to reset GPU here to recover from hang? */
396 return false;
397 }
398
399 return true;
400 }
401
a4xx_irq(struct msm_gpu * gpu)402 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
403 {
404 uint32_t status;
405
406 status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
407 DBG("%s: Int status %08x", gpu->name, status);
408
409 if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
410 uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
411 printk("CP | Protected mode error| %s | addr=%x\n",
412 reg & (1 << 24) ? "WRITE" : "READ",
413 (reg & 0xFFFFF) >> 2);
414 }
415
416 gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
417
418 msm_gpu_retire(gpu);
419
420 return IRQ_HANDLED;
421 }
422
423 static const unsigned int a4xx_registers[] = {
424 /* RBBM */
425 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
426 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
427 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
428 /* CP */
429 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
430 0x0578, 0x058F,
431 /* VSC */
432 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
433 /* GRAS */
434 0x0C80, 0x0C81, 0x0C88, 0x0C8F,
435 /* RB */
436 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
437 /* PC */
438 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
439 /* VFD */
440 0x0E40, 0x0E4A,
441 /* VPC */
442 0x0E60, 0x0E61, 0x0E63, 0x0E68,
443 /* UCHE */
444 0x0E80, 0x0E84, 0x0E88, 0x0E95,
445 /* VMIDMT */
446 0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
447 0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
448 0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
449 0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
450 0x1380, 0x1380,
451 /* GRAS CTX 0 */
452 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
453 /* PC CTX 0 */
454 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
455 /* VFD CTX 0 */
456 0x2200, 0x2204, 0x2208, 0x22A9,
457 /* GRAS CTX 1 */
458 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
459 /* PC CTX 1 */
460 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
461 /* VFD CTX 1 */
462 0x2600, 0x2604, 0x2608, 0x26A9,
463 /* XPU */
464 0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
465 0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
466 0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
467 /* VBIF */
468 0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
469 0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
470 0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
471 0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
472 0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
473 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
474 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
475 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
476 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
477 0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
478 0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
479 0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
480 0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
481 0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
482 0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
483 0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
484 0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
485 0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
486 0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
487 0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
488 0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
489 0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
490 0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
491 0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
492 0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
493 0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
494 0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
495 0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
496 0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
497 0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
498 0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
499 0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
500 0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
501 0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
502 ~0 /* sentinel */
503 };
504
505 static const unsigned int a405_registers[] = {
506 /* RBBM */
507 0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
508 0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
509 0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
510 /* CP */
511 0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
512 0x0578, 0x058F,
513 /* VSC */
514 0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
515 /* GRAS */
516 0x0C80, 0x0C81, 0x0C88, 0x0C8F,
517 /* RB */
518 0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
519 /* PC */
520 0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
521 /* VFD */
522 0x0E40, 0x0E4A,
523 /* VPC */
524 0x0E60, 0x0E61, 0x0E63, 0x0E68,
525 /* UCHE */
526 0x0E80, 0x0E84, 0x0E88, 0x0E95,
527 /* GRAS CTX 0 */
528 0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
529 /* PC CTX 0 */
530 0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
531 /* VFD CTX 0 */
532 0x2200, 0x2204, 0x2208, 0x22A9,
533 /* GRAS CTX 1 */
534 0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
535 /* PC CTX 1 */
536 0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
537 /* VFD CTX 1 */
538 0x2600, 0x2604, 0x2608, 0x26A9,
539 /* VBIF version 0x20050000*/
540 0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
541 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
542 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
543 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
544 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
545 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
546 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
547 0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
548 ~0 /* sentinel */
549 };
550
a4xx_gpu_state_get(struct msm_gpu * gpu)551 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
552 {
553 struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
554
555 if (!state)
556 return ERR_PTR(-ENOMEM);
557
558 adreno_gpu_state_get(gpu, state);
559
560 state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
561
562 return state;
563 }
564
a4xx_dump(struct msm_gpu * gpu)565 static void a4xx_dump(struct msm_gpu *gpu)
566 {
567 printk("status: %08x\n",
568 gpu_read(gpu, REG_A4XX_RBBM_STATUS));
569 adreno_dump(gpu);
570 }
571
a4xx_pm_resume(struct msm_gpu * gpu)572 static int a4xx_pm_resume(struct msm_gpu *gpu) {
573 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
574 int ret;
575
576 ret = msm_gpu_pm_resume(gpu);
577 if (ret)
578 return ret;
579
580 if (adreno_is_a430(adreno_gpu)) {
581 unsigned int reg;
582 /* Set the default register values; set SW_COLLAPSE to 0 */
583 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
584 do {
585 udelay(5);
586 reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
587 } while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
588 }
589 return 0;
590 }
591
a4xx_pm_suspend(struct msm_gpu * gpu)592 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
593 struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
594 int ret;
595
596 ret = msm_gpu_pm_suspend(gpu);
597 if (ret)
598 return ret;
599
600 if (adreno_is_a430(adreno_gpu)) {
601 /* Set the default register values; set SW_COLLAPSE to 1 */
602 gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
603 }
604 return 0;
605 }
606
a4xx_get_timestamp(struct msm_gpu * gpu,uint64_t * value)607 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
608 {
609 *value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
610
611 return 0;
612 }
613
a4xx_gpu_busy(struct msm_gpu * gpu,unsigned long * out_sample_rate)614 static u64 a4xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
615 {
616 u64 busy_cycles;
617
618 busy_cycles = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_RBBM_1_LO);
619 *out_sample_rate = clk_get_rate(gpu->core_clk);
620
621 return busy_cycles;
622 }
623
a4xx_get_rptr(struct msm_gpu * gpu,struct msm_ringbuffer * ring)624 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
625 {
626 ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
627 return ring->memptrs->rptr;
628 }
629
630 static const struct adreno_gpu_funcs funcs = {
631 .base = {
632 .get_param = adreno_get_param,
633 .set_param = adreno_set_param,
634 .hw_init = a4xx_hw_init,
635 .pm_suspend = a4xx_pm_suspend,
636 .pm_resume = a4xx_pm_resume,
637 .recover = a4xx_recover,
638 .submit = a4xx_submit,
639 .active_ring = adreno_active_ring,
640 .irq = a4xx_irq,
641 .destroy = a4xx_destroy,
642 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
643 .show = adreno_show,
644 #endif
645 .gpu_busy = a4xx_gpu_busy,
646 .gpu_state_get = a4xx_gpu_state_get,
647 .gpu_state_put = adreno_gpu_state_put,
648 .create_address_space = adreno_create_address_space,
649 .get_rptr = a4xx_get_rptr,
650 },
651 .get_timestamp = a4xx_get_timestamp,
652 };
653
a4xx_gpu_init(struct drm_device * dev)654 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
655 {
656 struct a4xx_gpu *a4xx_gpu = NULL;
657 struct adreno_gpu *adreno_gpu;
658 struct msm_gpu *gpu;
659 struct msm_drm_private *priv = dev->dev_private;
660 struct platform_device *pdev = priv->gpu_pdev;
661 struct icc_path *ocmem_icc_path;
662 struct icc_path *icc_path;
663 int ret;
664
665 if (!pdev) {
666 DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
667 ret = -ENXIO;
668 goto fail;
669 }
670
671 a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
672 if (!a4xx_gpu) {
673 ret = -ENOMEM;
674 goto fail;
675 }
676
677 adreno_gpu = &a4xx_gpu->base;
678 gpu = &adreno_gpu->base;
679
680 gpu->perfcntrs = NULL;
681 gpu->num_perfcntrs = 0;
682
683 ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
684 if (ret)
685 goto fail;
686
687 adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
688 a4xx_registers;
689
690 /* if needed, allocate gmem: */
691 ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
692 &a4xx_gpu->ocmem);
693 if (ret)
694 goto fail;
695
696 if (!gpu->aspace) {
697 /* TODO we think it is possible to configure the GPU to
698 * restrict access to VRAM carveout. But the required
699 * registers are unknown. For now just bail out and
700 * limp along with just modesetting. If it turns out
701 * to not be possible to restrict access, then we must
702 * implement a cmdstream validator.
703 */
704 DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
705 if (!allow_vram_carveout) {
706 ret = -ENXIO;
707 goto fail;
708 }
709 }
710
711 icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
712 if (IS_ERR(icc_path)) {
713 ret = PTR_ERR(icc_path);
714 goto fail;
715 }
716
717 ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
718 if (IS_ERR(ocmem_icc_path)) {
719 ret = PTR_ERR(ocmem_icc_path);
720 /* allow -ENODATA, ocmem icc is optional */
721 if (ret != -ENODATA)
722 goto fail;
723 ocmem_icc_path = NULL;
724 }
725
726 /*
727 * Set the ICC path to maximum speed for now by multiplying the fastest
728 * frequency by the bus width (8). We'll want to scale this later on to
729 * improve battery life.
730 */
731 icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
732 icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
733
734 return gpu;
735
736 fail:
737 if (a4xx_gpu)
738 a4xx_destroy(&a4xx_gpu->base.base);
739
740 return ERR_PTR(ret);
741 }
742