xref: /linux/drivers/gpu/drm/msm/adreno/a8xx_preempt.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
1*a693602eSAkhil P Oommen // SPDX-License-Identifier: GPL-2.0
2*a693602eSAkhil P Oommen /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */
3*a693602eSAkhil P Oommen 
4*a693602eSAkhil P Oommen #include "msm_gem.h"
5*a693602eSAkhil P Oommen #include "a6xx_gpu.h"
6*a693602eSAkhil P Oommen #include "a6xx_gmu.xml.h"
7*a693602eSAkhil P Oommen #include "a6xx_preempt.h"
8*a693602eSAkhil P Oommen #include "msm_mmu.h"
9*a693602eSAkhil P Oommen #include "msm_gpu_trace.h"
10*a693602eSAkhil P Oommen 
11*a693602eSAkhil P Oommen static void preempt_prepare_postamble(struct a6xx_gpu *a6xx_gpu)
12*a693602eSAkhil P Oommen {
13*a693602eSAkhil P Oommen 	u32 *postamble = a6xx_gpu->preempt_postamble_ptr;
14*a693602eSAkhil P Oommen 	u32 count = 0;
15*a693602eSAkhil P Oommen 
16*a693602eSAkhil P Oommen 	postamble[count++] = PKT7(CP_REG_RMW, 3);
17*a693602eSAkhil P Oommen 	postamble[count++] = REG_A8XX_RBBM_PERFCTR_SRAM_INIT_CMD;
18*a693602eSAkhil P Oommen 	postamble[count++] = 0;
19*a693602eSAkhil P Oommen 	postamble[count++] = 1;
20*a693602eSAkhil P Oommen 
21*a693602eSAkhil P Oommen 	postamble[count++] = PKT7(CP_WAIT_REG_MEM, 6);
22*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ);
23*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_LO(
24*a693602eSAkhil P Oommen 				REG_A8XX_RBBM_PERFCTR_SRAM_INIT_STATUS);
25*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_POLL_ADDR_HI(0);
26*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_3_REF(0x1);
27*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_4_MASK(0x1);
28*a693602eSAkhil P Oommen 	postamble[count++] = CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(0);
29*a693602eSAkhil P Oommen 
30*a693602eSAkhil P Oommen 	a6xx_gpu->preempt_postamble_len = count;
31*a693602eSAkhil P Oommen 
32*a693602eSAkhil P Oommen 	a6xx_gpu->postamble_enabled = true;
33*a693602eSAkhil P Oommen }
34*a693602eSAkhil P Oommen 
35*a693602eSAkhil P Oommen static void preempt_disable_postamble(struct a6xx_gpu *a6xx_gpu)
36*a693602eSAkhil P Oommen {
37*a693602eSAkhil P Oommen 	u32 *postamble = a6xx_gpu->preempt_postamble_ptr;
38*a693602eSAkhil P Oommen 
39*a693602eSAkhil P Oommen 	/*
40*a693602eSAkhil P Oommen 	 * Disable the postamble by replacing the first packet header with a NOP
41*a693602eSAkhil P Oommen 	 * that covers the whole buffer.
42*a693602eSAkhil P Oommen 	 */
43*a693602eSAkhil P Oommen 	*postamble = PKT7(CP_NOP, (a6xx_gpu->preempt_postamble_len - 1));
44*a693602eSAkhil P Oommen 
45*a693602eSAkhil P Oommen 	a6xx_gpu->postamble_enabled = false;
46*a693602eSAkhil P Oommen }
47*a693602eSAkhil P Oommen 
48*a693602eSAkhil P Oommen /*
49*a693602eSAkhil P Oommen  * Set preemption keepalive vote. Please note that this vote is different from the one used in
50*a693602eSAkhil P Oommen  * a8xx_irq()
51*a693602eSAkhil P Oommen  */
52*a693602eSAkhil P Oommen static void a8xx_preempt_keepalive_vote(struct msm_gpu *gpu, bool on)
53*a693602eSAkhil P Oommen {
54*a693602eSAkhil P Oommen 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
55*a693602eSAkhil P Oommen 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
56*a693602eSAkhil P Oommen 
57*a693602eSAkhil P Oommen 	gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_PWR_COL_PREEMPT_KEEPALIVE, on);
58*a693602eSAkhil P Oommen }
59*a693602eSAkhil P Oommen 
60*a693602eSAkhil P Oommen void a8xx_preempt_irq(struct msm_gpu *gpu)
61*a693602eSAkhil P Oommen {
62*a693602eSAkhil P Oommen 	uint32_t status;
63*a693602eSAkhil P Oommen 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
64*a693602eSAkhil P Oommen 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
65*a693602eSAkhil P Oommen 	struct drm_device *dev = gpu->dev;
66*a693602eSAkhil P Oommen 
67*a693602eSAkhil P Oommen 	if (!try_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED, PREEMPT_PENDING))
68*a693602eSAkhil P Oommen 		return;
69*a693602eSAkhil P Oommen 
70*a693602eSAkhil P Oommen 	/* Delete the preemption watchdog timer */
71*a693602eSAkhil P Oommen 	timer_delete(&a6xx_gpu->preempt_timer);
72*a693602eSAkhil P Oommen 
73*a693602eSAkhil P Oommen 	/*
74*a693602eSAkhil P Oommen 	 * The hardware should be setting the stop bit of CP_CONTEXT_SWITCH_CNTL
75*a693602eSAkhil P Oommen 	 * to zero before firing the interrupt, but there is a non zero chance
76*a693602eSAkhil P Oommen 	 * of a hardware condition or a software race that could set it again
77*a693602eSAkhil P Oommen 	 * before we have a chance to finish. If that happens, log and go for
78*a693602eSAkhil P Oommen 	 * recovery
79*a693602eSAkhil P Oommen 	 */
80*a693602eSAkhil P Oommen 	status = gpu_read(gpu, REG_A8XX_CP_CONTEXT_SWITCH_CNTL);
81*a693602eSAkhil P Oommen 	if (unlikely(status & A8XX_CP_CONTEXT_SWITCH_CNTL_STOP)) {
82*a693602eSAkhil P Oommen 		DRM_DEV_ERROR(&gpu->pdev->dev,
83*a693602eSAkhil P Oommen 					  "!!!!!!!!!!!!!!!! preemption faulted !!!!!!!!!!!!!! irq\n");
84*a693602eSAkhil P Oommen 		set_preempt_state(a6xx_gpu, PREEMPT_FAULTED);
85*a693602eSAkhil P Oommen 		dev_err(dev->dev, "%s: Preemption failed to complete\n",
86*a693602eSAkhil P Oommen 			gpu->name);
87*a693602eSAkhil P Oommen 		kthread_queue_work(gpu->worker, &gpu->recover_work);
88*a693602eSAkhil P Oommen 		return;
89*a693602eSAkhil P Oommen 	}
90*a693602eSAkhil P Oommen 
91*a693602eSAkhil P Oommen 	a6xx_gpu->cur_ring = a6xx_gpu->next_ring;
92*a693602eSAkhil P Oommen 	a6xx_gpu->next_ring = NULL;
93*a693602eSAkhil P Oommen 
94*a693602eSAkhil P Oommen 	set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
95*a693602eSAkhil P Oommen 
96*a693602eSAkhil P Oommen 	update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
97*a693602eSAkhil P Oommen 
98*a693602eSAkhil P Oommen 	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
99*a693602eSAkhil P Oommen 
100*a693602eSAkhil P Oommen 	a8xx_preempt_keepalive_vote(gpu, false);
101*a693602eSAkhil P Oommen 
102*a693602eSAkhil P Oommen 	trace_msm_gpu_preemption_irq(a6xx_gpu->cur_ring->id);
103*a693602eSAkhil P Oommen 
104*a693602eSAkhil P Oommen 	/*
105*a693602eSAkhil P Oommen 	 * Retrigger preemption to avoid a deadlock that might occur when preemption
106*a693602eSAkhil P Oommen 	 * is skipped due to it being already in flight when requested.
107*a693602eSAkhil P Oommen 	 */
108*a693602eSAkhil P Oommen 	a8xx_preempt_trigger(gpu);
109*a693602eSAkhil P Oommen }
110*a693602eSAkhil P Oommen 
111*a693602eSAkhil P Oommen void a8xx_preempt_hw_init(struct msm_gpu *gpu)
112*a693602eSAkhil P Oommen {
113*a693602eSAkhil P Oommen 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
114*a693602eSAkhil P Oommen 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
115*a693602eSAkhil P Oommen 	int i;
116*a693602eSAkhil P Oommen 
117*a693602eSAkhil P Oommen 	/* No preemption if we only have one ring */
118*a693602eSAkhil P Oommen 	if (gpu->nr_rings == 1)
119*a693602eSAkhil P Oommen 		return;
120*a693602eSAkhil P Oommen 
121*a693602eSAkhil P Oommen 	for (i = 0; i < gpu->nr_rings; i++) {
122*a693602eSAkhil P Oommen 		struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[i];
123*a693602eSAkhil P Oommen 
124*a693602eSAkhil P Oommen 		record_ptr->wptr = 0;
125*a693602eSAkhil P Oommen 		record_ptr->rptr = 0;
126*a693602eSAkhil P Oommen 		record_ptr->rptr_addr = shadowptr(a6xx_gpu, gpu->rb[i]);
127*a693602eSAkhil P Oommen 		record_ptr->info = 0;
128*a693602eSAkhil P Oommen 		record_ptr->data = 0;
129*a693602eSAkhil P Oommen 		record_ptr->rbase = gpu->rb[i]->iova;
130*a693602eSAkhil P Oommen 	}
131*a693602eSAkhil P Oommen 
132*a693602eSAkhil P Oommen 	/* Write a 0 to signal that we aren't switching pagetables */
133*a693602eSAkhil P Oommen 	gpu_write64(gpu, REG_A8XX_CP_CONTEXT_SWITCH_SMMU_INFO, 0);
134*a693602eSAkhil P Oommen 
135*a693602eSAkhil P Oommen 	/* Enable the GMEM save/restore feature for preemption */
136*a693602eSAkhil P Oommen 	gpu_write(gpu, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 0x1);
137*a693602eSAkhil P Oommen 
138*a693602eSAkhil P Oommen 	/* Reset the preemption state */
139*a693602eSAkhil P Oommen 	set_preempt_state(a6xx_gpu, PREEMPT_NONE);
140*a693602eSAkhil P Oommen 
141*a693602eSAkhil P Oommen 	spin_lock_init(&a6xx_gpu->eval_lock);
142*a693602eSAkhil P Oommen 
143*a693602eSAkhil P Oommen 	/* Always come up on rb 0 */
144*a693602eSAkhil P Oommen 	a6xx_gpu->cur_ring = gpu->rb[0];
145*a693602eSAkhil P Oommen }
146*a693602eSAkhil P Oommen 
147*a693602eSAkhil P Oommen void a8xx_preempt_trigger(struct msm_gpu *gpu)
148*a693602eSAkhil P Oommen {
149*a693602eSAkhil P Oommen 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
150*a693602eSAkhil P Oommen 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
151*a693602eSAkhil P Oommen 	unsigned long flags;
152*a693602eSAkhil P Oommen 	struct msm_ringbuffer *ring;
153*a693602eSAkhil P Oommen 	unsigned int cntl;
154*a693602eSAkhil P Oommen 	bool sysprof;
155*a693602eSAkhil P Oommen 
156*a693602eSAkhil P Oommen 	if (gpu->nr_rings == 1)
157*a693602eSAkhil P Oommen 		return;
158*a693602eSAkhil P Oommen 
159*a693602eSAkhil P Oommen 	/*
160*a693602eSAkhil P Oommen 	 * Lock to make sure another thread attempting preemption doesn't skip it
161*a693602eSAkhil P Oommen 	 * while we are still evaluating the next ring. This makes sure the other
162*a693602eSAkhil P Oommen 	 * thread does start preemption if we abort it and avoids a soft lock.
163*a693602eSAkhil P Oommen 	 */
164*a693602eSAkhil P Oommen 	spin_lock_irqsave(&a6xx_gpu->eval_lock, flags);
165*a693602eSAkhil P Oommen 
166*a693602eSAkhil P Oommen 	/*
167*a693602eSAkhil P Oommen 	 * Try to start preemption by moving from NONE to START. If
168*a693602eSAkhil P Oommen 	 * unsuccessful, a preemption is already in flight
169*a693602eSAkhil P Oommen 	 */
170*a693602eSAkhil P Oommen 	if (!try_preempt_state(a6xx_gpu, PREEMPT_NONE, PREEMPT_START)) {
171*a693602eSAkhil P Oommen 		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
172*a693602eSAkhil P Oommen 		return;
173*a693602eSAkhil P Oommen 	}
174*a693602eSAkhil P Oommen 
175*a693602eSAkhil P Oommen 	cntl = A8XX_CP_CONTEXT_SWITCH_CNTL_LEVEL(a6xx_gpu->preempt_level);
176*a693602eSAkhil P Oommen 
177*a693602eSAkhil P Oommen 	if (a6xx_gpu->skip_save_restore)
178*a693602eSAkhil P Oommen 		cntl |= A8XX_CP_CONTEXT_SWITCH_CNTL_SKIP_SAVE_RESTORE;
179*a693602eSAkhil P Oommen 
180*a693602eSAkhil P Oommen 	if (a6xx_gpu->uses_gmem)
181*a693602eSAkhil P Oommen 		cntl |= A8XX_CP_CONTEXT_SWITCH_CNTL_USES_GMEM;
182*a693602eSAkhil P Oommen 
183*a693602eSAkhil P Oommen 	cntl |= A8XX_CP_CONTEXT_SWITCH_CNTL_STOP;
184*a693602eSAkhil P Oommen 
185*a693602eSAkhil P Oommen 	/* Get the next ring to preempt to */
186*a693602eSAkhil P Oommen 	ring = get_next_ring(gpu);
187*a693602eSAkhil P Oommen 
188*a693602eSAkhil P Oommen 	/*
189*a693602eSAkhil P Oommen 	 * If no ring is populated or the highest priority ring is the current
190*a693602eSAkhil P Oommen 	 * one do nothing except to update the wptr to the latest and greatest
191*a693602eSAkhil P Oommen 	 */
192*a693602eSAkhil P Oommen 	if (!ring || (a6xx_gpu->cur_ring == ring)) {
193*a693602eSAkhil P Oommen 		set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
194*a693602eSAkhil P Oommen 		update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
195*a693602eSAkhil P Oommen 		set_preempt_state(a6xx_gpu, PREEMPT_NONE);
196*a693602eSAkhil P Oommen 		spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
197*a693602eSAkhil P Oommen 		return;
198*a693602eSAkhil P Oommen 	}
199*a693602eSAkhil P Oommen 
200*a693602eSAkhil P Oommen 	spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
201*a693602eSAkhil P Oommen 
202*a693602eSAkhil P Oommen 	spin_lock_irqsave(&ring->preempt_lock, flags);
203*a693602eSAkhil P Oommen 
204*a693602eSAkhil P Oommen 	struct a7xx_cp_smmu_info *smmu_info_ptr =
205*a693602eSAkhil P Oommen 		a6xx_gpu->preempt_smmu[ring->id];
206*a693602eSAkhil P Oommen 	struct a6xx_preempt_record *record_ptr = a6xx_gpu->preempt[ring->id];
207*a693602eSAkhil P Oommen 	u64 ttbr0 = ring->memptrs->ttbr0;
208*a693602eSAkhil P Oommen 	u32 context_idr = ring->memptrs->context_idr;
209*a693602eSAkhil P Oommen 
210*a693602eSAkhil P Oommen 	smmu_info_ptr->ttbr0 = ttbr0;
211*a693602eSAkhil P Oommen 	smmu_info_ptr->context_idr = context_idr;
212*a693602eSAkhil P Oommen 	record_ptr->wptr = get_wptr(ring);
213*a693602eSAkhil P Oommen 
214*a693602eSAkhil P Oommen 	/*
215*a693602eSAkhil P Oommen 	 * The GPU will write the wptr we set above when we preempt. Reset
216*a693602eSAkhil P Oommen 	 * restore_wptr to make sure that we don't write WPTR to the same
217*a693602eSAkhil P Oommen 	 * thing twice. It's still possible subsequent submissions will update
218*a693602eSAkhil P Oommen 	 * wptr again, in which case they will set the flag to true. This has
219*a693602eSAkhil P Oommen 	 * to be protected by the lock for setting the flag and updating wptr
220*a693602eSAkhil P Oommen 	 * to be atomic.
221*a693602eSAkhil P Oommen 	 */
222*a693602eSAkhil P Oommen 	ring->restore_wptr = false;
223*a693602eSAkhil P Oommen 
224*a693602eSAkhil P Oommen 	trace_msm_gpu_preemption_trigger(a6xx_gpu->cur_ring->id, ring->id);
225*a693602eSAkhil P Oommen 
226*a693602eSAkhil P Oommen 	spin_unlock_irqrestore(&ring->preempt_lock, flags);
227*a693602eSAkhil P Oommen 
228*a693602eSAkhil P Oommen 	/* Set the keepalive bit to keep the GPU ON until preemption is complete */
229*a693602eSAkhil P Oommen 	a8xx_preempt_keepalive_vote(gpu, true);
230*a693602eSAkhil P Oommen 
231*a693602eSAkhil P Oommen 	a6xx_fenced_write(a6xx_gpu,
232*a693602eSAkhil P Oommen 		REG_A8XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
233*a693602eSAkhil P Oommen 		BIT(1), true);
234*a693602eSAkhil P Oommen 
235*a693602eSAkhil P Oommen 	a6xx_fenced_write(a6xx_gpu,
236*a693602eSAkhil P Oommen 		REG_A8XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
237*a693602eSAkhil P Oommen 		a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
238*a693602eSAkhil P Oommen 
239*a693602eSAkhil P Oommen 	a6xx_gpu->next_ring = ring;
240*a693602eSAkhil P Oommen 
241*a693602eSAkhil P Oommen 	/* Start a timer to catch a stuck preemption */
242*a693602eSAkhil P Oommen 	mod_timer(&a6xx_gpu->preempt_timer, jiffies + msecs_to_jiffies(10000));
243*a693602eSAkhil P Oommen 
244*a693602eSAkhil P Oommen 	/* Enable or disable postamble as needed */
245*a693602eSAkhil P Oommen 	sysprof = refcount_read(&a6xx_gpu->base.base.sysprof_active) > 1;
246*a693602eSAkhil P Oommen 
247*a693602eSAkhil P Oommen 	if (!sysprof && !a6xx_gpu->postamble_enabled)
248*a693602eSAkhil P Oommen 		preempt_prepare_postamble(a6xx_gpu);
249*a693602eSAkhil P Oommen 
250*a693602eSAkhil P Oommen 	if (sysprof && a6xx_gpu->postamble_enabled)
251*a693602eSAkhil P Oommen 		preempt_disable_postamble(a6xx_gpu);
252*a693602eSAkhil P Oommen 
253*a693602eSAkhil P Oommen 	/* Set the preemption state to triggered */
254*a693602eSAkhil P Oommen 	set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
255*a693602eSAkhil P Oommen 
256*a693602eSAkhil P Oommen 	/* Trigger the preemption */
257*a693602eSAkhil P Oommen 	a6xx_fenced_write(a6xx_gpu, REG_A8XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
258*a693602eSAkhil P Oommen }
259*a693602eSAkhil P Oommen 
260