xref: /linux/drivers/gpu/drm/msm/adreno/adreno_gpu.c (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "adreno_gpu.h"
21 #include "msm_gem.h"
22 #include "msm_mmu.h"
23 
24 #define RB_SIZE    SZ_32K
25 #define RB_BLKSIZE 32
26 
27 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
28 {
29 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
30 
31 	switch (param) {
32 	case MSM_PARAM_GPU_ID:
33 		*value = adreno_gpu->info->revn;
34 		return 0;
35 	case MSM_PARAM_GMEM_SIZE:
36 		*value = adreno_gpu->gmem;
37 		return 0;
38 	case MSM_PARAM_GMEM_BASE:
39 		*value = 0x100000;
40 		return 0;
41 	case MSM_PARAM_CHIP_ID:
42 		*value = adreno_gpu->rev.patchid |
43 				(adreno_gpu->rev.minor << 8) |
44 				(adreno_gpu->rev.major << 16) |
45 				(adreno_gpu->rev.core << 24);
46 		return 0;
47 	case MSM_PARAM_MAX_FREQ:
48 		*value = adreno_gpu->base.fast_rate;
49 		return 0;
50 	case MSM_PARAM_TIMESTAMP:
51 		if (adreno_gpu->funcs->get_timestamp)
52 			return adreno_gpu->funcs->get_timestamp(gpu, value);
53 		return -EINVAL;
54 	default:
55 		DBG("%s: invalid param: %u", gpu->name, param);
56 		return -EINVAL;
57 	}
58 }
59 
60 int adreno_hw_init(struct msm_gpu *gpu)
61 {
62 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
63 	int ret;
64 
65 	DBG("%s", gpu->name);
66 
67 	ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, &gpu->rb_iova);
68 	if (ret) {
69 		gpu->rb_iova = 0;
70 		dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
71 		return ret;
72 	}
73 
74 	/* reset ringbuffer: */
75 	gpu->rb->cur = gpu->rb->start;
76 
77 	/* reset completed fence seqno: */
78 	adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
79 	adreno_gpu->memptrs->rptr  = 0;
80 	adreno_gpu->memptrs->wptr  = 0;
81 
82 	/* Setup REG_CP_RB_CNTL: */
83 	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
84 			/* size is log2(quad-words): */
85 			AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
86 			AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
87 			(adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0));
88 
89 	/* Setup ringbuffer address: */
90 	adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE,
91 		REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova);
92 
93 	if (!adreno_is_a430(adreno_gpu)) {
94 		adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
95 			REG_ADRENO_CP_RB_RPTR_ADDR_HI,
96 			rbmemptr(adreno_gpu, rptr));
97 	}
98 
99 	return 0;
100 }
101 
102 static uint32_t get_wptr(struct msm_ringbuffer *ring)
103 {
104 	return ring->cur - ring->start;
105 }
106 
107 /* Use this helper to read rptr, since a430 doesn't update rptr in memory */
108 static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
109 {
110 	if (adreno_is_a430(adreno_gpu))
111 		return adreno_gpu->memptrs->rptr = adreno_gpu_read(
112 			adreno_gpu, REG_ADRENO_CP_RB_RPTR);
113 	else
114 		return adreno_gpu->memptrs->rptr;
115 }
116 
117 uint32_t adreno_last_fence(struct msm_gpu *gpu)
118 {
119 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
120 	return adreno_gpu->memptrs->fence;
121 }
122 
123 void adreno_recover(struct msm_gpu *gpu)
124 {
125 	struct drm_device *dev = gpu->dev;
126 	int ret;
127 
128 	// XXX pm-runtime??  we *need* the device to be off after this
129 	// so maybe continuing to call ->pm_suspend/resume() is better?
130 
131 	gpu->funcs->pm_suspend(gpu);
132 	gpu->funcs->pm_resume(gpu);
133 
134 	ret = msm_gpu_hw_init(gpu);
135 	if (ret) {
136 		dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
137 		/* hmm, oh well? */
138 	}
139 }
140 
141 void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
142 		struct msm_file_private *ctx)
143 {
144 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
145 	struct msm_drm_private *priv = gpu->dev->dev_private;
146 	struct msm_ringbuffer *ring = gpu->rb;
147 	unsigned i;
148 
149 	for (i = 0; i < submit->nr_cmds; i++) {
150 		switch (submit->cmd[i].type) {
151 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
152 			/* ignore IB-targets */
153 			break;
154 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
155 			/* ignore if there has not been a ctx switch: */
156 			if (priv->lastctx == ctx)
157 				break;
158 		case MSM_SUBMIT_CMD_BUF:
159 			OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
160 				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
161 			OUT_RING(ring, submit->cmd[i].iova);
162 			OUT_RING(ring, submit->cmd[i].size);
163 			OUT_PKT2(ring);
164 			break;
165 		}
166 	}
167 
168 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
169 	OUT_RING(ring, submit->fence->seqno);
170 
171 	if (adreno_is_a3xx(adreno_gpu) || adreno_is_a4xx(adreno_gpu)) {
172 		/* Flush HLSQ lazy updates to make sure there is nothing
173 		 * pending for indirect loads after the timestamp has
174 		 * passed:
175 		 */
176 		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
177 		OUT_RING(ring, HLSQ_FLUSH);
178 
179 		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
180 		OUT_RING(ring, 0x00000000);
181 	}
182 
183 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
184 	OUT_RING(ring, CACHE_FLUSH_TS);
185 	OUT_RING(ring, rbmemptr(adreno_gpu, fence));
186 	OUT_RING(ring, submit->fence->seqno);
187 
188 	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
189 	OUT_PKT3(ring, CP_INTERRUPT, 1);
190 	OUT_RING(ring, 0x80000000);
191 
192 	/* Workaround for missing irq issue on 8x16/a306.  Unsure if the
193 	 * root cause is a platform issue or some a306 quirk, but this
194 	 * keeps things humming along:
195 	 */
196 	if (adreno_is_a306(adreno_gpu)) {
197 		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
198 		OUT_RING(ring, 0x00000000);
199 		OUT_PKT3(ring, CP_INTERRUPT, 1);
200 		OUT_RING(ring, 0x80000000);
201 	}
202 
203 #if 0
204 	if (adreno_is_a3xx(adreno_gpu)) {
205 		/* Dummy set-constant to trigger context rollover */
206 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
207 		OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
208 		OUT_RING(ring, 0x00000000);
209 	}
210 #endif
211 
212 	gpu->funcs->flush(gpu);
213 }
214 
215 void adreno_flush(struct msm_gpu *gpu)
216 {
217 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
218 	uint32_t wptr;
219 
220 	/*
221 	 * Mask wptr value that we calculate to fit in the HW range. This is
222 	 * to account for the possibility that the last command fit exactly into
223 	 * the ringbuffer and rb->next hasn't wrapped to zero yet
224 	 */
225 	wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1);
226 
227 	/* ensure writes to ringbuffer have hit system memory: */
228 	mb();
229 
230 	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr);
231 }
232 
233 bool adreno_idle(struct msm_gpu *gpu)
234 {
235 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
236 	uint32_t wptr = get_wptr(gpu->rb);
237 
238 	/* wait for CP to drain ringbuffer: */
239 	if (!spin_until(get_rptr(adreno_gpu) == wptr))
240 		return true;
241 
242 	/* TODO maybe we need to reset GPU here to recover from hang? */
243 	DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
244 	return false;
245 }
246 
247 #ifdef CONFIG_DEBUG_FS
248 void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
249 {
250 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
251 	int i;
252 
253 	seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
254 			adreno_gpu->info->revn, adreno_gpu->rev.core,
255 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
256 			adreno_gpu->rev.patchid);
257 
258 	seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
259 			gpu->fctx->last_fence);
260 	seq_printf(m, "rptr:     %d\n", get_rptr(adreno_gpu));
261 	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
262 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
263 
264 	/* dump these out in a form that can be parsed by demsm: */
265 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
266 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
267 		uint32_t start = adreno_gpu->registers[i];
268 		uint32_t end   = adreno_gpu->registers[i+1];
269 		uint32_t addr;
270 
271 		for (addr = start; addr <= end; addr++) {
272 			uint32_t val = gpu_read(gpu, addr);
273 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
274 		}
275 	}
276 }
277 #endif
278 
279 /* Dump common gpu status and scratch registers on any hang, to make
280  * the hangcheck logs more useful.  The scratch registers seem always
281  * safe to read when GPU has hung (unlike some other regs, depending
282  * on how the GPU hung), and they are useful to match up to cmdstream
283  * dumps when debugging hangs:
284  */
285 void adreno_dump_info(struct msm_gpu *gpu)
286 {
287 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
288 
289 	printk("revision: %d (%d.%d.%d.%d)\n",
290 			adreno_gpu->info->revn, adreno_gpu->rev.core,
291 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
292 			adreno_gpu->rev.patchid);
293 
294 	printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
295 			gpu->fctx->last_fence);
296 	printk("rptr:     %d\n", get_rptr(adreno_gpu));
297 	printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
298 	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
299 }
300 
301 /* would be nice to not have to duplicate the _show() stuff with printk(): */
302 void adreno_dump(struct msm_gpu *gpu)
303 {
304 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
305 	int i;
306 
307 	/* dump these out in a form that can be parsed by demsm: */
308 	printk("IO:region %s 00000000 00020000\n", gpu->name);
309 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
310 		uint32_t start = adreno_gpu->registers[i];
311 		uint32_t end   = adreno_gpu->registers[i+1];
312 		uint32_t addr;
313 
314 		for (addr = start; addr <= end; addr++) {
315 			uint32_t val = gpu_read(gpu, addr);
316 			printk("IO:R %08x %08x\n", addr<<2, val);
317 		}
318 	}
319 }
320 
321 static uint32_t ring_freewords(struct msm_gpu *gpu)
322 {
323 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
324 	uint32_t size = gpu->rb->size / 4;
325 	uint32_t wptr = get_wptr(gpu->rb);
326 	uint32_t rptr = get_rptr(adreno_gpu);
327 	return (rptr + (size - 1) - wptr) % size;
328 }
329 
330 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
331 {
332 	if (spin_until(ring_freewords(gpu) >= ndwords))
333 		DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
334 }
335 
336 static const char *iommu_ports[] = {
337 		"gfx3d_user", "gfx3d_priv",
338 		"gfx3d1_user", "gfx3d1_priv",
339 };
340 
341 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
342 		struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
343 {
344 	struct adreno_platform_config *config = pdev->dev.platform_data;
345 	struct msm_gpu *gpu = &adreno_gpu->base;
346 	int ret;
347 
348 	adreno_gpu->funcs = funcs;
349 	adreno_gpu->info = adreno_info(config->rev);
350 	adreno_gpu->gmem = adreno_gpu->info->gmem;
351 	adreno_gpu->revn = adreno_gpu->info->revn;
352 	adreno_gpu->rev = config->rev;
353 
354 	gpu->fast_rate = config->fast_rate;
355 	gpu->bus_freq  = config->bus_freq;
356 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
357 	gpu->bus_scale_table = config->bus_scale_table;
358 #endif
359 
360 	DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u",
361 			gpu->fast_rate, gpu->bus_freq);
362 
363 	ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
364 			adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
365 			RB_SIZE);
366 	if (ret)
367 		return ret;
368 
369 	pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
370 	pm_runtime_use_autosuspend(&pdev->dev);
371 	pm_runtime_enable(&pdev->dev);
372 
373 	ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
374 	if (ret) {
375 		dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
376 				adreno_gpu->info->pm4fw, ret);
377 		return ret;
378 	}
379 
380 	ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev);
381 	if (ret) {
382 		dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
383 				adreno_gpu->info->pfpfw, ret);
384 		return ret;
385 	}
386 
387 	if (gpu->aspace && gpu->aspace->mmu) {
388 		struct msm_mmu *mmu = gpu->aspace->mmu;
389 		ret = mmu->funcs->attach(mmu, iommu_ports,
390 				ARRAY_SIZE(iommu_ports));
391 		if (ret)
392 			return ret;
393 	}
394 
395 	mutex_lock(&drm->struct_mutex);
396 	adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs),
397 			MSM_BO_UNCACHED);
398 	mutex_unlock(&drm->struct_mutex);
399 	if (IS_ERR(adreno_gpu->memptrs_bo)) {
400 		ret = PTR_ERR(adreno_gpu->memptrs_bo);
401 		adreno_gpu->memptrs_bo = NULL;
402 		dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
403 		return ret;
404 	}
405 
406 	adreno_gpu->memptrs = msm_gem_get_vaddr(adreno_gpu->memptrs_bo);
407 	if (IS_ERR(adreno_gpu->memptrs)) {
408 		dev_err(drm->dev, "could not vmap memptrs\n");
409 		return -ENOMEM;
410 	}
411 
412 	ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->id,
413 			&adreno_gpu->memptrs_iova);
414 	if (ret) {
415 		dev_err(drm->dev, "could not map memptrs: %d\n", ret);
416 		return ret;
417 	}
418 
419 	return 0;
420 }
421 
422 void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
423 {
424 	struct msm_gpu *gpu = &adreno_gpu->base;
425 
426 	if (adreno_gpu->memptrs_bo) {
427 		if (adreno_gpu->memptrs)
428 			msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
429 
430 		if (adreno_gpu->memptrs_iova)
431 			msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
432 
433 		drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
434 	}
435 	release_firmware(adreno_gpu->pm4);
436 	release_firmware(adreno_gpu->pfp);
437 
438 	msm_gpu_cleanup(gpu);
439 
440 	if (gpu->aspace) {
441 		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
442 			iommu_ports, ARRAY_SIZE(iommu_ports));
443 		msm_gem_address_space_put(gpu->aspace);
444 	}
445 }
446