xref: /linux/drivers/gpu/drm/amd/amdgpu/vpe_v2_0.c (revision e5565e7c3fada97f8b95c9704df80db85feafea4)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/firmware.h>
24 #include "amdgpu.h"
25 #include "amdgpu_ucode.h"
26 #include "amdgpu_vpe.h"
27 #include "vpe_v2_0.h"
28 #include "soc15_common.h"
29 #include "ivsrcid/vpe/irqsrcs_vpe_6_1.h"
30 #include "vpe/vpe_2_0_0_offset.h"
31 #include "vpe/vpe_2_0_0_sh_mask.h"
32 
33 MODULE_FIRMWARE("amdgpu/vpe_2_0_0.bin");
34 MODULE_FIRMWARE("amdgpu/vpe_2_2_0.bin");
35 
36 #define VPE_THREAD1_UCODE_OFFSET	0x8000
37 
38 static uint32_t vpe_v2_0_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset)
39 {
40 	uint32_t base;
41 
42 	base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0];
43 
44 	return base + offset;
45 }
46 
47 static int vpe_v2_0_irq_init(struct amdgpu_vpe *vpe)
48 {
49 	struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe);
50 	int ret;
51 
52 	ret = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VPE,
53 				VPE_6_1_SRCID__VPE_TRAP,
54 				&adev->vpe.trap_irq);
55 	if (ret)
56 		return ret;
57 
58 	return 0;
59 }
60 
61 static int vpe_v2_0_load_microcode(struct amdgpu_vpe *vpe)
62 {
63 	struct amdgpu_device *adev = vpe->ring.adev;
64 	const struct vpe_firmware_header_v1_0 *vpe_hdr;
65 	const __le32 *data;
66 	uint32_t ucode_offset[2], ucode_size[2], size_dw, ret;
67 	uint32_t f32_offset, f32_cntl, reg_data;
68 
69 	ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL));
70 	ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0);
71 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret);
72 
73 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL2));
74 	reg_data = REG_SET_FIELD(reg_data, VPEC_CNTL2, IB_FIFO_WATERMARK, 1);
75 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL2), reg_data);
76 
77 	if (amdgpu_vpe_configure_dpm(vpe))
78 		dev_warn(adev->dev, "VPE DPM not enabled.\n");
79 
80 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
81 
82 		f32_offset = vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL);
83 		f32_cntl = RREG32(f32_offset);
84 		f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 0);
85 		f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 0);
86 
87 		adev->vpe.cmdbuf_cpu_addr[0] = f32_offset;
88 		adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl;
89 
90 		return amdgpu_vpe_psp_update_sram(adev);
91 	}
92 
93 	/* Halt and Check F32 cleaness */
94 	f32_offset = vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL);
95 	f32_cntl = RREG32(f32_offset);
96 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 1);
97 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 1);
98 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_CHECKSUM_CLR, 1);
99 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH0_CHECKSUM_CLR, 1);
100 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl);
101 
102 	f32_cntl = RREG32(f32_offset);
103 	if (!REG_GET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT)) {
104 		dev_err(adev->dev, "VPEC is not halted");
105 		return -EBUSY;
106 	}
107 
108 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_CHECKSUM_CLR, 0);
109 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH0_CHECKSUM_CLR, 0);
110 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl);
111 
112 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_CHECKSUM));
113 	if (reg_data) {
114 		dev_err(adev->dev, "VPE FW checksum 0 not clean");
115 		return -EBUSY;
116 	}
117 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE1_CHECKSUM));
118 	if (reg_data) {
119 		dev_err(adev->dev, "VPE FW checksum 1 not clean");
120 		return -EBUSY;
121 	}
122 
123 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_STATUS2));
124 	if (REG_GET_FIELD(reg_data, VPEC_STATUS2, TH0F32_INSTR_PTR)) {
125 		dev_err(adev->dev, "VPE FW initial status not clean");
126 		return -EBUSY;
127 	}
128 
129 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_STATUS6));
130 	if (REG_GET_FIELD(reg_data, VPEC_STATUS6, TH1F32_INSTR_PTR)) {
131 		dev_err(adev->dev, "VPE FW initial status not clean");
132 		return -EBUSY;
133 	}
134 	/* end of F32 cleaness check */
135 
136 	vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data;
137 
138 	/* Thread 0(command thread) ucode offset/size */
139 	ucode_offset[0] = le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes);
140 	ucode_size[0] = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes);
141 	/* Thread 1(control thread) ucode offset/size */
142 	ucode_offset[1] = le32_to_cpu(vpe_hdr->ctl_ucode_offset);
143 	ucode_size[1] = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes);
144 
145 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_PG_CNTL));
146 	reg_data = REG_SET_FIELD(reg_data, VPEC_PG_CNTL, PG_EN, 0);
147 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_PG_CNTL), reg_data);
148 
149 	for (int j = 0; j < vpe->num_instances; j++) {
150 		for (int i = 0; i < 2; i++) {
151 			if (i > 0)
152 				WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET);
153 			else
154 				WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0);
155 
156 			data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]);
157 			size_dw = ucode_size[i] / sizeof(__le32);
158 
159 			while (size_dw--) {
160 				if (amdgpu_emu_mode && size_dw % 500 == 0)
161 					msleep(1);
162 				WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++));
163 			}
164 		}
165 	}
166 
167 	reg_data = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_PG_CNTL));
168 	reg_data = REG_SET_FIELD(reg_data, VPEC_PG_CNTL, PG_EN, 1);
169 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_PG_CNTL), reg_data);
170 
171 	/* Unhalt F32 */
172 	f32_cntl = RREG32(f32_offset);
173 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 0);
174 	f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 0);
175 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl);
176 
177 	return 0;
178 }
179 
180 static int vpe_v2_0_ring_start(struct amdgpu_vpe *vpe)
181 {
182 	struct amdgpu_ring *ring = &vpe->ring;
183 	struct amdgpu_device *adev = ring->adev;
184 	uint32_t doorbell, doorbell_offset;
185 	uint32_t rb_bufsz, rb_cntl;
186 	uint32_t ib_cntl, i;
187 	int ret;
188 
189 	for (i = 0; i < vpe->num_instances; i++) {
190 		/* Set ring buffer size in dwords */
191 		rb_bufsz = order_base_2(ring->ring_size / 4);
192 		rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL));
193 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
194 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1);
195 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0);
196 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
197 
198 		/* Initialize the ring buffer's read and write pointers */
199 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0);
200 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0);
201 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0);
202 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0);
203 
204 		/* set the wb address whether it's enabled or not */
205 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO),
206 			lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
207 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI),
208 			upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
209 
210 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
211 
212 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
213 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
214 
215 		ring->wptr = 0;
216 
217 		/* before programing wptr to a less value, need set minor_ptr_update first */
218 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1);
219 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
220 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
221 		/* set minor_ptr_update to 0 after wptr programed */
222 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0);
223 
224 		doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET));
225 		doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4);
226 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
227 
228 		doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL));
229 		doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0);
230 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell);
231 
232 		adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4);
233 
234 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
235 		rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1);
236 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl);
237 
238 		ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL));
239 		ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1);
240 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl);
241 	}
242 
243 	ret = amdgpu_ring_test_helper(ring);
244 	if (ret)
245 		return ret;
246 
247 	return 0;
248 }
249 
250 static int vpe_v2_0_ring_stop(struct amdgpu_vpe *vpe)
251 {
252 	struct amdgpu_device *adev = vpe->ring.adev;
253 	uint32_t queue_reset, i;
254 	int ret;
255 
256 	for (i = 0; i < vpe->num_instances; i++) {
257 		queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ));
258 
259 		queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1);
260 
261 		WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset);
262 		/* timeout length is adev->timeout_usec */
263 		ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0,
264 					 VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK);
265 
266 		if (ret)
267 			dev_err(adev->dev, "VPE queue reset failed\n");
268 	}
269 
270 	vpe->ring.sched.ready = false;
271 
272 	return ret;
273 }
274 
275 static int vpe_v2_0_set_trap_irq_state(struct amdgpu_device *adev,
276 				       struct amdgpu_irq_src *source,
277 				       unsigned int type,
278 				       enum amdgpu_interrupt_state state)
279 {
280 	struct amdgpu_vpe *vpe = &adev->vpe;
281 	uint32_t vpe_cntl;
282 
283 	vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL));
284 	vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE,
285 				 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
286 
287 	WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl);
288 
289 	return 0;
290 }
291 
292 static int vpe_v2_0_process_trap_irq(struct amdgpu_device *adev,
293 				     struct amdgpu_irq_src *source,
294 				     struct amdgpu_iv_entry *entry)
295 {
296 
297 	DRM_DEBUG("IH: VPE trap\n");
298 
299 	switch (entry->client_id) {
300 	case SOC21_IH_CLIENTID_VPE:
301 		amdgpu_fence_process(&adev->vpe.ring);
302 		break;
303 	default:
304 		break;
305 	}
306 
307 	return 0;
308 }
309 
310 static int vpe_v2_0_set_regs(struct amdgpu_vpe *vpe)
311 {
312 	vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR;
313 	vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI;
314 	vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR;
315 	vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI;
316 	vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT;
317 	vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2;
318 
319 	vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4;
320 	vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3;
321 	vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4;
322 	vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2;
323 	vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3;
324 	vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1;
325 	vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3;
326 
327 	return 0;
328 }
329 
330 static struct vpe_funcs vpe_v2_0_funcs = {
331 	.get_reg_offset = vpe_v2_0_get_reg_offset,
332 	.set_regs = vpe_v2_0_set_regs,
333 	.irq_init = vpe_v2_0_irq_init,
334 	.init_microcode = amdgpu_vpe_init_microcode,
335 	.load_microcode = vpe_v2_0_load_microcode,
336 	.ring_init = amdgpu_vpe_ring_init,
337 	.ring_start = vpe_v2_0_ring_start,
338 	.ring_stop = vpe_v2_0_ring_stop,
339 	.ring_fini = amdgpu_vpe_ring_fini,
340 };
341 
342 static const struct amdgpu_irq_src_funcs vpe_v2_0_trap_irq_funcs = {
343 	.set = vpe_v2_0_set_trap_irq_state,
344 	.process = vpe_v2_0_process_trap_irq,
345 };
346 
347 void vpe_v2_0_set_funcs(struct amdgpu_vpe *vpe)
348 {
349 	vpe->funcs = &vpe_v2_0_funcs;
350 	vpe->trap_irq.funcs = &vpe_v2_0_trap_irq_funcs;
351 }
352