xref: /linux/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c (revision 92c4c9fdc838d3b41a996bb700ea64b9e78fc7ea)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_trace.h"
32 
33 #include "gc/gc_12_1_0_offset.h"
34 #include "gc/gc_12_1_0_sh_mask.h"
35 #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
36 
37 #include "soc15_common.h"
38 #include "soc15.h"
39 #include "sdma_v7_1_0_pkt_open.h"
40 #include "nbio_v4_3.h"
41 #include "sdma_common.h"
42 #include "sdma_v7_1.h"
43 #include "v12_structs.h"
44 #include "mes_userqueue.h"
45 #include "soc_v1_0.h"
46 
47 MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
48 
49 #define SDMA1_REG_OFFSET 0x600
50 #define SDMA0_SDMA_IDX_0_END 0x450
51 #define SDMA1_HYP_DEC_REG_OFFSET 0x30
52 
53 static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
54 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
55 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
56 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
57 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
58 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
59 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
60 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
61 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
62 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
63 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
64 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
65 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
66 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
67 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
68 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
69 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
70 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
71 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
72 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
73 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
74 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
75 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
76 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
77 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
78 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
79 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
80 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
81 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
82 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
83 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
84 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
85 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
86 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
87 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
88 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
89 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
90 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
91 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
92 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
93 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
94 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
95 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
96 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
97 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
98 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
99 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
100 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
101 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
102 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
103 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
104 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
105 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
106 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
107 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
108 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
109 };
110 
111 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
112 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
113 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
114 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
115 				uint32_t inst_mask);
116 
sdma_v7_1_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 internal_offset)117 static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
118 {
119 	u32 base;
120 	u32 dev_inst = GET_INST(SDMA0, instance);
121 	int xcc_id = adev->sdma.instance[instance].xcc_id;
122 	int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
123 
124 	if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
125 		base = adev->reg_offset[GC_HWIP][xcc_id][1];
126 		if (xcc_inst != 0)
127 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
128 	} else {
129 		base = adev->reg_offset[GC_HWIP][xcc_id][0];
130 		if (xcc_inst != 0)
131 			internal_offset += SDMA1_REG_OFFSET * xcc_inst;
132 	}
133 
134 	return base + internal_offset;
135 }
136 
sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)137 static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
138 					      uint64_t addr)
139 {
140 	unsigned ret;
141 
142 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
143 	amdgpu_ring_write(ring, lower_32_bits(addr));
144 	amdgpu_ring_write(ring, upper_32_bits(addr));
145 	amdgpu_ring_write(ring, 1);
146 	/* this is the offset we need patch later */
147 	ret = ring->wptr & ring->buf_mask;
148 	/* insert dummy here and patch it later */
149 	amdgpu_ring_write(ring, 0);
150 
151 	return ret;
152 }
153 
154 /**
155  * sdma_v7_1_ring_get_rptr - get the current read pointer
156  *
157  * @ring: amdgpu ring pointer
158  *
159  * Get the current rptr from the hardware.
160  */
sdma_v7_1_ring_get_rptr(struct amdgpu_ring * ring)161 static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
162 {
163 	u64 *rptr;
164 
165 	/* XXX check if swapping is necessary on BE */
166 	rptr = (u64 *)ring->rptr_cpu_addr;
167 
168 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
169 	return ((*rptr) >> 2);
170 }
171 
172 /**
173  * sdma_v7_1_ring_get_wptr - get the current write pointer
174  *
175  * @ring: amdgpu ring pointer
176  *
177  * Get the current wptr from the hardware.
178  */
sdma_v7_1_ring_get_wptr(struct amdgpu_ring * ring)179 static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
180 {
181 	u64 wptr = 0;
182 
183 	if (ring->use_doorbell) {
184 		/* XXX check if swapping is necessary on BE */
185 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
186 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
187 	}
188 
189 	return wptr >> 2;
190 }
191 
192 /**
193  * sdma_v7_1_ring_set_wptr - commit the write pointer
194  *
195  * @ring: amdgpu ring pointer
196  *
197  * Write the wptr back to the hardware.
198  */
sdma_v7_1_ring_set_wptr(struct amdgpu_ring * ring)199 static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
200 {
201 	struct amdgpu_device *adev = ring->adev;
202 
203 	DRM_DEBUG("Setting write pointer\n");
204 
205 	if (ring->use_doorbell) {
206 		DRM_DEBUG("Using doorbell -- "
207 			  "wptr_offs == 0x%08x "
208 			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
209 			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
210 			  ring->wptr_offs,
211 			  lower_32_bits(ring->wptr << 2),
212 			  upper_32_bits(ring->wptr << 2));
213 		/* XXX check if swapping is necessary on BE */
214 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
215 			     ring->wptr << 2);
216 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
217 			  ring->doorbell_index, ring->wptr << 2);
218 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
219 	} else {
220 		DRM_DEBUG("Not using doorbell -- "
221 			  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
222 			  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
223 			  ring->me,
224 			  lower_32_bits(ring->wptr << 2),
225 			  ring->me,
226 			  upper_32_bits(ring->wptr << 2));
227 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
228 							     ring->me,
229 							     regSDMA0_SDMA_QUEUE0_RB_WPTR),
230 				lower_32_bits(ring->wptr << 2));
231 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
232 							     ring->me,
233 							     regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
234 				upper_32_bits(ring->wptr << 2));
235 	}
236 }
237 
sdma_v7_1_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)238 static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
239 {
240 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
241 	int i;
242 
243 	for (i = 0; i < count; i++)
244 		if (sdma && sdma->burst_nop && (i == 0))
245 			amdgpu_ring_write(ring, ring->funcs->nop |
246 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
247 		else
248 			amdgpu_ring_write(ring, ring->funcs->nop);
249 }
250 
251 /**
252  * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
253  *
254  * @ring: amdgpu ring pointer
255  * @job: job to retrieve vmid from
256  * @ib: IB object to schedule
257  * @flags: unused
258  *
259  * Schedule an IB in the DMA ring.
260  */
sdma_v7_1_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)261 static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
262 				   struct amdgpu_job *job,
263 				   struct amdgpu_ib *ib,
264 				   uint32_t flags)
265 {
266 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
267 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
268 
269 	/* An IB packet must end on a 8 DW boundary--the next dword
270 	 * must be on a 8-dword boundary. Our IB packet below is 6
271 	 * dwords long, thus add x number of NOPs, such that, in
272 	 * modular arithmetic,
273 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
274 	 * (wptr + 6 + x) % 8 = 0.
275 	 * The expression below, is a solution of x.
276 	 */
277 	sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
278 
279 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
280 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
281 	/* base must be 32 byte aligned */
282 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
283 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
284 	amdgpu_ring_write(ring, ib->length_dw);
285 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
286 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
287 }
288 
289 /**
290  * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
291  *
292  * @ring: amdgpu ring pointer
293  *
294  * flush the IB by graphics cache rinse.
295  */
sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring * ring)296 static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
297 {
298 	uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
299 		SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
300 		SDMA_GCR_GLI_INV(1);
301 
302 	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
303 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
304 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
305 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
306 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
307 			  SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
308 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
309 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
310 			  SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
311 }
312 
313 
314 /**
315  * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
316  *
317  * @ring: amdgpu ring pointer
318  * @addr: address
319  * @seq: fence seq number
320  * @flags: fence flags
321  *
322  * Add a DMA fence packet to the ring to write
323  * the fence seq number and DMA trap packet to generate
324  * an interrupt if needed.
325  */
sdma_v7_1_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)326 static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
327 				      unsigned flags)
328 {
329 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
330 	/* write the fence */
331 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
332 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
333 	/* zero in first two bits */
334 	BUG_ON(addr & 0x3);
335 	amdgpu_ring_write(ring, lower_32_bits(addr));
336 	amdgpu_ring_write(ring, upper_32_bits(addr));
337 	amdgpu_ring_write(ring, lower_32_bits(seq));
338 
339 	/* optionally write high bits as well */
340 	if (write64bit) {
341 		addr += 4;
342 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
343 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
344 		/* zero in first two bits */
345 		BUG_ON(addr & 0x3);
346 		amdgpu_ring_write(ring, lower_32_bits(addr));
347 		amdgpu_ring_write(ring, upper_32_bits(addr));
348 		amdgpu_ring_write(ring, upper_32_bits(seq));
349 	}
350 
351 	if (flags & AMDGPU_FENCE_FLAG_INT) {
352 		/* generate an interrupt */
353 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
354 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
355 	}
356 }
357 
358 /**
359  * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
360  *
361  * @adev: amdgpu_device pointer
362  * @inst_mask: mask of dma engine instances to be disabled
363  *
364  * Stop the gfx async dma ring buffers.
365  */
sdma_v7_1_inst_gfx_stop(struct amdgpu_device * adev,uint32_t inst_mask)366 static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
367 				    uint32_t inst_mask)
368 {
369 	u32 rb_cntl, ib_cntl;
370 	int i;
371 
372 	for_each_inst(i, inst_mask) {
373 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
374 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
375 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
376 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
377 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
378 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
379 	}
380 }
381 
382 /**
383  * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
384  *
385  * @adev: amdgpu_device pointer
386  * @inst_mask: mask of dma engine instances to be disabled
387  *
388  * Stop the compute async dma queues.
389  */
sdma_v7_1_inst_rlc_stop(struct amdgpu_device * adev,uint32_t inst_mask)390 static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
391 				    uint32_t inst_mask)
392 {
393 	/* XXX todo */
394 }
395 
396 /**
397  * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
398  *
399  * @adev: amdgpu_device pointer
400  * @enable: enable/disable the DMA MEs context switch.
401  * @inst_mask: mask of dma engine instances to be enabled
402  *
403  * Halt or unhalt the async dma engines context switch.
404  */
sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)405 static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
406 					     bool enable, uint32_t inst_mask)
407 {
408 	int i;
409 
410 	for_each_inst(i, inst_mask) {
411 		WREG32_SOC15_IP(GC,
412 			sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
413 	}
414 }
415 
416 /**
417  * sdma_v7_1_inst_enable - stop the async dma engines
418  *
419  * @adev: amdgpu_device pointer
420  * @enable: enable/disable the DMA MEs.
421  * @inst_mask: mask of dma engine instances to be enabled
422  *
423  * Halt or unhalt the async dma engines.
424  */
sdma_v7_1_inst_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)425 static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
426 				  bool enable, uint32_t inst_mask)
427 {
428 	u32 mcu_cntl;
429 	int i;
430 
431 	if (!enable) {
432 		sdma_v7_1_inst_gfx_stop(adev, inst_mask);
433 		sdma_v7_1_inst_rlc_stop(adev, inst_mask);
434 	}
435 
436 	if (amdgpu_sriov_vf(adev))
437 		return;
438 
439 	for_each_inst(i, inst_mask) {
440 		mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
441 		mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
442 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
443 	}
444 }
445 
446 /**
447  * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
448  *
449  * @adev: amdgpu_device pointer
450  * @i: instance
451  * @restore: used to restore wptr when restart
452  *
453  * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
454  * Return 0 for success.
455  */
sdma_v7_1_gfx_resume_instance(struct amdgpu_device * adev,int i,bool restore)456 static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
457 {
458 	struct amdgpu_ring *ring;
459 	u32 rb_cntl, ib_cntl;
460 	u32 rb_bufsz;
461 	u32 doorbell;
462 	u32 doorbell_offset;
463 	u32 temp;
464 	u64 wptr_gpu_addr;
465 	int r;
466 
467 	ring = &adev->sdma.instance[i].ring;
468 
469 	/* Set ring buffer size in dwords */
470 	rb_bufsz = order_base_2(ring->ring_size / 4);
471 	rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
472 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
473 #ifdef __BIG_ENDIAN
474 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
475 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
476 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
477 #endif
478 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
479 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
480 
481 	/* Initialize the ring buffer's read and write pointers */
482 	if (restore) {
483 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
484 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
485 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
486 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
487 	} else {
488 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
489 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
490 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
491 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
492 	}
493 	/* setup the wptr shadow polling */
494 	wptr_gpu_addr = ring->wptr_gpu_addr;
495 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
496 	       lower_32_bits(wptr_gpu_addr));
497 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
498 	       upper_32_bits(wptr_gpu_addr));
499 
500 	/* set the wb address whether it's enabled or not */
501 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
502 	       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
503 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
504 	       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
505 
506 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
507 	if (amdgpu_sriov_vf(adev))
508 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
509 	else
510 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
511 
512 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
513 
514 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
515 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
516 
517 	if (!restore)
518 		ring->wptr = 0;
519 
520 	/* before programing wptr to a less value, need set minor_ptr_update first */
521 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
522 
523 	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
524 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
525 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
526 	}
527 
528 	doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
529 	doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
530 
531 	if (ring->use_doorbell) {
532 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
533 		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
534 				OFFSET, ring->doorbell_index);
535 	} else {
536 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
537 	}
538 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
539 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
540 
541 	if (i == 0)
542 		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
543 					      ring->doorbell_index,
544 					      adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
545 
546 	if (amdgpu_sriov_vf(adev))
547 		sdma_v7_1_ring_set_wptr(ring);
548 
549 	/* set minor_ptr_update to 0 after wptr programed */
550 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
551 
552 	/* Set up sdma hang watchdog */
553 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
554 	/* 100ms per unit */
555 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
556 			     max(adev->usec_timeout/100000, 1));
557 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
558 
559 	/* Set up RESP_MODE to non-copy addresses */
560 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
561 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
562 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
563 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
564 
565 	/* program default cache read and write policy */
566 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
567 	/* clean read policy and write policy bits */
568 	temp &= 0xFF0FFF;
569 	temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
570 		 (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
571 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
572 
573 	if (!amdgpu_sriov_vf(adev)) {
574 		/* unhalt engine */
575 		temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
576 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
577 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
578 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
579 	}
580 
581 	/* enable DMA RB */
582 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
583 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
584 
585 	ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
586 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
587 #ifdef __BIG_ENDIAN
588 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
589 #endif
590 	/* enable DMA IBs */
591 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
592 	ring->sched.ready = true;
593 
594 	if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
595 		sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
596 		sdma_v7_1_inst_enable(adev, true, i);
597 	}
598 
599 	r = amdgpu_ring_test_helper(ring);
600 	if (r)
601 		ring->sched.ready = false;
602 
603 	return r;
604 }
605 
606 /**
607  * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
608  *
609  * @adev: amdgpu_device pointer
610  * @inst_mask: mask of dma engine instances to be enabled
611  *
612  * Set up the gfx DMA ring buffers and enable them.
613  * Returns 0 for success, error for failure.
614  */
sdma_v7_1_inst_gfx_resume(struct amdgpu_device * adev,uint32_t inst_mask)615 static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
616 				     uint32_t inst_mask)
617 {
618 	int i, r;
619 
620 	for_each_inst(i, inst_mask) {
621 		r = sdma_v7_1_gfx_resume_instance(adev, i, false);
622 		if (r)
623 			return r;
624 	}
625 
626 	return 0;
627 
628 }
629 
630 /**
631  * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
632  *
633  * @adev: amdgpu_device pointer
634  * @inst_mask: mask of dma engine instances to be enabled
635  *
636  * Set up the compute DMA queues and enable them.
637  * Returns 0 for success, error for failure.
638  */
sdma_v7_1_inst_rlc_resume(struct amdgpu_device * adev,uint32_t inst_mask)639 static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
640 				     uint32_t inst_mask)
641 {
642 	return 0;
643 }
644 
sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device * adev,uint32_t inst_mask)645 static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
646 					     uint32_t inst_mask)
647 {
648 	int i;
649 
650 	for_each_inst(i, inst_mask) {
651 		amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
652 				      &adev->sdma.instance[i].sdma_fw_gpu_addr,
653 				      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
654 	}
655 }
656 
657 /**
658  * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
659  *
660  * @adev: amdgpu_device pointer
661  * @inst_mask: mask of dma engine instances to be enabled
662  *
663  * Loads the sDMA0/1 ucode.
664  * Returns 0 for success, -EINVAL if the ucode is not available.
665  */
sdma_v7_1_inst_load_microcode(struct amdgpu_device * adev,uint32_t inst_mask)666 static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
667 					 uint32_t inst_mask)
668 {
669 	const struct sdma_firmware_header_v3_0 *hdr;
670 	const __le32 *fw_data;
671 	u32 fw_size;
672 	uint32_t tmp, sdma_status, ic_op_cntl;
673 	int i, r, j;
674 
675 	/* halt the MEs */
676 	sdma_v7_1_inst_enable(adev, false, inst_mask);
677 
678 	if (!adev->sdma.instance[0].fw)
679 		return -EINVAL;
680 
681 	hdr = (const struct sdma_firmware_header_v3_0 *)
682 		adev->sdma.instance[0].fw->data;
683 	amdgpu_ucode_print_sdma_hdr(&hdr->header);
684 
685 	fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
686 			le32_to_cpu(hdr->ucode_offset_bytes));
687 	fw_size = le32_to_cpu(hdr->ucode_size_bytes);
688 
689 	for_each_inst(i, inst_mask) {
690 		r = amdgpu_bo_create_reserved(adev, fw_size,
691 					      PAGE_SIZE,
692 					      AMDGPU_GEM_DOMAIN_VRAM,
693 					      &adev->sdma.instance[i].sdma_fw_obj,
694 					      &adev->sdma.instance[i].sdma_fw_gpu_addr,
695 					      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
696 		if (r) {
697 			dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
698 			return r;
699 		}
700 
701 		memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
702 
703 		amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
704 		amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
705 
706 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
707 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
708 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
709 
710 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
711 			lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
712 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
713 			upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
714 
715 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
716 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
717 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
718 
719 		/* Wait for sdma ucode init complete */
720 		for (j = 0; j < adev->usec_timeout; j++) {
721 			ic_op_cntl = RREG32_SOC15_IP(GC,
722 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
723 			sdma_status = RREG32_SOC15_IP(GC,
724 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
725 			if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
726 			    (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
727 				break;
728 			udelay(1);
729 		}
730 
731 		if (j >= adev->usec_timeout) {
732 			dev_err(adev->dev, "failed to init sdma ucode\n");
733 			return -EINVAL;
734 		}
735 	}
736 
737 	return 0;
738 }
739 
sdma_v7_1_soft_reset(struct amdgpu_ip_block * ip_block)740 static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
741 {
742 	struct amdgpu_device *adev = ip_block->adev;
743 	uint32_t inst_mask;
744 	u32 tmp;
745 	int i;
746 
747 	inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0);
748 	sdma_v7_1_inst_gfx_stop(adev, inst_mask);
749 
750 	for_each_inst(i, inst_mask) {
751 		//tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
752 		//tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
753 		//WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
754 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
755 		tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
756 		tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
757 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
758 
759 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
760 
761 		udelay(100);
762 
763 		tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
764 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
765 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
766 
767 		udelay(100);
768 
769 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
770 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
771 
772 		udelay(100);
773 	}
774 
775 	return sdma_v7_1_inst_start(adev, inst_mask);
776 }
777 
sdma_v7_1_check_soft_reset(struct amdgpu_ip_block * ip_block)778 static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
779 {
780 	struct amdgpu_device *adev = ip_block->adev;
781 	struct amdgpu_ring *ring;
782 	int i, r;
783 	long tmo = msecs_to_jiffies(1000);
784 
785 	for (i = 0; i < adev->sdma.num_instances; i++) {
786 		ring = &adev->sdma.instance[i].ring;
787 		r = amdgpu_ring_test_ib(ring, tmo);
788 		if (r)
789 			return true;
790 	}
791 
792 	return false;
793 }
794 
sdma_v7_1_reset_queue(struct amdgpu_ring * ring,unsigned int vmid,struct amdgpu_fence * timedout_fence)795 static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
796 				 unsigned int vmid,
797 				 struct amdgpu_fence *timedout_fence)
798 {
799 	struct amdgpu_device *adev = ring->adev;
800 	int r;
801 
802 	if (ring->me >= adev->sdma.num_instances) {
803 		dev_err(adev->dev, "sdma instance not found\n");
804 		return -EINVAL;
805 	}
806 
807 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
808 
809 	r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
810 	if (r)
811 		return r;
812 
813 	r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
814 	if (r)
815 		return r;
816 
817 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
818 }
819 
820 /**
821  * sdma_v7_1_inst_start - setup and start the async dma engines
822  *
823  * @adev: amdgpu_device pointer
824  * @inst_mask: mask of dma engine instances to be enabled
825  *
826  * Set up the DMA engines and enable them.
827  * Returns 0 for success, error for failure.
828  */
sdma_v7_1_inst_start(struct amdgpu_device * adev,uint32_t inst_mask)829 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
830 				uint32_t inst_mask)
831 {
832 	int r = 0;
833 
834 	if (amdgpu_sriov_vf(adev)) {
835 		sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
836 		sdma_v7_1_inst_enable(adev, false, inst_mask);
837 
838 		/* set RB registers */
839 		r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
840 		return r;
841 	}
842 
843 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
844 		r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
845 		if (r) {
846 			sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
847 			return r;
848 		}
849 
850 		if (amdgpu_emu_mode == 1)
851 			msleep(1000);
852 	}
853 
854 	/* unhalt the MEs */
855 	sdma_v7_1_inst_enable(adev, true, inst_mask);
856 	/* enable sdma ring preemption */
857 	sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
858 
859 	/* start the gfx rings and rlc compute queues */
860 	r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
861 	if (r)
862 		return r;
863 	r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
864 
865 	return r;
866 }
867 
sdma_v7_1_mqd_init(struct amdgpu_device * adev,void * mqd,struct amdgpu_mqd_prop * prop)868 static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
869 			      struct amdgpu_mqd_prop *prop)
870 {
871 	struct v12_sdma_mqd *m = mqd;
872 	uint64_t wb_gpu_addr;
873 
874 	m->sdmax_rlcx_rb_cntl =
875 		order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
876 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
877 		4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
878 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
879 
880 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
881 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
882 
883 	wb_gpu_addr = prop->wptr_gpu_addr;
884 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
885 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
886 
887 	wb_gpu_addr = prop->rptr_gpu_addr;
888 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
889 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
890 
891 	m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
892 							regSDMA0_SDMA_QUEUE0_IB_CNTL));
893 
894 	m->sdmax_rlcx_doorbell_offset =
895 		prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
896 
897 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
898 
899 	m->sdmax_rlcx_doorbell_log = 0;
900 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
901 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
902 
903 	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
904 	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
905 
906 	return 0;
907 }
908 
sdma_v7_1_set_mqd_funcs(struct amdgpu_device * adev)909 static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
910 {
911 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
912 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
913 }
914 
915 /**
916  * sdma_v7_1_ring_test_ring - simple async dma engine test
917  *
918  * @ring: amdgpu_ring structure holding ring information
919  *
920  * Test the DMA engine by writing using it to write an
921  * value to memory.
922  * Returns 0 for success, error for failure.
923  */
sdma_v7_1_ring_test_ring(struct amdgpu_ring * ring)924 static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
925 {
926 	struct amdgpu_device *adev = ring->adev;
927 	unsigned i;
928 	unsigned index;
929 	int r;
930 	u32 tmp;
931 	u64 gpu_addr;
932 
933 	tmp = 0xCAFEDEAD;
934 
935 	r = amdgpu_device_wb_get(adev, &index);
936 	if (r) {
937 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
938 		return r;
939 	}
940 
941 	gpu_addr = adev->wb.gpu_addr + (index * 4);
942 	adev->wb.wb[index] = cpu_to_le32(tmp);
943 
944 	r = amdgpu_ring_alloc(ring, 5);
945 	if (r) {
946 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
947 		amdgpu_device_wb_free(adev, index);
948 		return r;
949 	}
950 
951 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
952 			  SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
953 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
954 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
955 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
956 	amdgpu_ring_write(ring, 0xDEADBEEF);
957 	amdgpu_ring_commit(ring);
958 
959 	for (i = 0; i < adev->usec_timeout; i++) {
960 		tmp = le32_to_cpu(adev->wb.wb[index]);
961 		if (tmp == 0xDEADBEEF)
962 			break;
963 		if (amdgpu_emu_mode == 1)
964 			msleep(1);
965 		else
966 			udelay(1);
967 	}
968 
969 	if (i >= adev->usec_timeout)
970 		r = -ETIMEDOUT;
971 
972 	amdgpu_device_wb_free(adev, index);
973 
974 	return r;
975 }
976 
977 /**
978  * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
979  *
980  * @ring: amdgpu_ring structure holding ring information
981  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
982  *
983  * Test a simple IB in the DMA ring.
984  * Returns 0 on success, error on failure.
985  */
sdma_v7_1_ring_test_ib(struct amdgpu_ring * ring,long timeout)986 static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
987 {
988 	struct amdgpu_device *adev = ring->adev;
989 	struct amdgpu_ib ib;
990 	struct dma_fence *f = NULL;
991 	unsigned index;
992 	long r;
993 	u32 tmp = 0;
994 	u64 gpu_addr;
995 
996 	tmp = 0xCAFEDEAD;
997 	memset(&ib, 0, sizeof(ib));
998 
999 	r = amdgpu_device_wb_get(adev, &index);
1000 	if (r) {
1001 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1002 		return r;
1003 	}
1004 
1005 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1006 	adev->wb.wb[index] = cpu_to_le32(tmp);
1007 
1008 	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
1009 	if (r) {
1010 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1011 		goto err0;
1012 	}
1013 
1014 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1015 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1016 	ib.ptr[1] = lower_32_bits(gpu_addr);
1017 	ib.ptr[2] = upper_32_bits(gpu_addr);
1018 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1019 	ib.ptr[4] = 0xDEADBEEF;
1020 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1021 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1022 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1023 	ib.length_dw = 8;
1024 
1025 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1026 	if (r)
1027 		goto err1;
1028 
1029 	r = dma_fence_wait_timeout(f, false, timeout);
1030 	if (r == 0) {
1031 		DRM_ERROR("amdgpu: IB test timed out\n");
1032 		r = -ETIMEDOUT;
1033 		goto err1;
1034 	} else if (r < 0) {
1035 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1036 		goto err1;
1037 	}
1038 
1039 	tmp = le32_to_cpu(adev->wb.wb[index]);
1040 
1041 	if (tmp == 0xDEADBEEF)
1042 		r = 0;
1043 	else
1044 		r = -EINVAL;
1045 
1046 err1:
1047 	amdgpu_ib_free(&ib, NULL);
1048 	dma_fence_put(f);
1049 err0:
1050 	amdgpu_device_wb_free(adev, index);
1051 	return r;
1052 }
1053 
1054 
1055 /**
1056  * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
1057  *
1058  * @ib: indirect buffer to fill with commands
1059  * @pe: addr of the page entry
1060  * @src: src addr to copy from
1061  * @count: number of page entries to update
1062  *
1063  * Update PTEs by copying them from the GART using sDMA.
1064  */
sdma_v7_1_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)1065 static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
1066 				  uint64_t pe, uint64_t src,
1067 				  unsigned count)
1068 {
1069 	unsigned bytes = count * 8;
1070 
1071 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1072 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1073 
1074 	ib->ptr[ib->length_dw++] = bytes - 1;
1075 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1076 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1077 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1078 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1079 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1080 
1081 }
1082 
1083 /**
1084  * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
1085  *
1086  * @ib: indirect buffer to fill with commands
1087  * @pe: addr of the page entry
1088  * @value: dst addr to write into pe
1089  * @count: number of page entries to update
1090  * @incr: increase next addr by incr bytes
1091  *
1092  * Update PTEs by writing them manually using sDMA.
1093  */
sdma_v7_1_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)1094 static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1095 				   uint64_t value, unsigned count,
1096 				   uint32_t incr)
1097 {
1098 	unsigned ndw = count * 2;
1099 
1100 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1101 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1102 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1103 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1104 	ib->ptr[ib->length_dw++] = ndw - 1;
1105 	for (; ndw > 0; ndw -= 2) {
1106 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1107 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1108 		value += incr;
1109 	}
1110 }
1111 
1112 /**
1113  * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
1114  *
1115  * @ib: indirect buffer to fill with commands
1116  * @pe: addr of the page entry
1117  * @addr: dst addr to write into pe
1118  * @count: number of page entries to update
1119  * @incr: increase next addr by incr bytes
1120  * @flags: access flags
1121  *
1122  * Update the page tables using sDMA.
1123  */
sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)1124 static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
1125 				     uint64_t pe,
1126 				     uint64_t addr, unsigned count,
1127 				     uint32_t incr, uint64_t flags)
1128 {
1129 	/* for physically contiguous pages (vram) */
1130 	u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
1131 
1132 	/* TODO:
1133 	 * When VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is enabled, change below MTYPE
1134 	 * to RW for AID A1 and UC for AID A0. NC needs additional GCR flush and need not
1135 	 * be supported. Also, honour amdgpu_mtype_local override. RW would additionally
1136 	 * require setting SCOPE bits in the header.
1137 	 *
1138 	 * header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2:RW) |
1139 	 *           SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
1140 	 *           SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3:SYS_SCOPE));
1141 	 */
1142 
1143 	/* VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is 0 which defaults to UC. So,
1144 	 * use MTYPE_UC (0x3). For ref. MTYPE_RW=0x2 MTYPE_NC=0x0
1145 	 */
1146 	header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3) | SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1);
1147 
1148 	ib->ptr[ib->length_dw++] = header;
1149 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1150 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1151 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1152 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1153 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1154 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1155 	ib->ptr[ib->length_dw++] = incr; /* increment size */
1156 	ib->ptr[ib->length_dw++] = 0;
1157 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1158 }
1159 
1160 /**
1161  * sdma_v7_1_ring_pad_ib - pad the IB
1162  *
1163  * @ring: amdgpu ring pointer
1164  * @ib: indirect buffer to fill with padding
1165  *
1166  * Pad the IB with NOPs to a boundary multiple of 8.
1167  */
sdma_v7_1_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)1168 static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1169 {
1170 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1171 	u32 pad_count;
1172 	int i;
1173 
1174 	pad_count = (-ib->length_dw) & 0x7;
1175 	for (i = 0; i < pad_count; i++)
1176 		if (sdma && sdma->burst_nop && (i == 0))
1177 			ib->ptr[ib->length_dw++] =
1178 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
1179 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1180 		else
1181 			ib->ptr[ib->length_dw++] =
1182 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
1183 }
1184 
1185 /**
1186  * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
1187  *
1188  * @ring: amdgpu_ring pointer
1189  *
1190  * Make sure all previous operations are completed (CIK).
1191  */
sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring * ring)1192 static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1193 {
1194 	uint32_t seq = ring->fence_drv.sync_seq;
1195 	uint64_t addr = ring->fence_drv.gpu_addr;
1196 
1197 	/* wait for idle */
1198 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1199 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1200 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1201 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1202 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1203 	amdgpu_ring_write(ring, seq); /* reference */
1204 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
1205 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1206 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1207 }
1208 
1209 /**
1210  * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
1211  *
1212  * @ring: amdgpu_ring pointer
1213  * @vmid: vmid number to use
1214  * @pd_addr: address
1215  *
1216  * Update the page table base and flush the VM TLB
1217  * using sDMA.
1218  */
sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)1219 static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
1220 					 unsigned vmid, uint64_t pd_addr)
1221 {
1222 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1223 }
1224 
sdma_v7_1_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1225 static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
1226 				     uint32_t reg, uint32_t val)
1227 {
1228 	/* SRBM WRITE command will not support on sdma v7.
1229 	 * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
1230 	 */
1231 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
1232 	amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
1233 	amdgpu_ring_write(ring, val);
1234 }
1235 
sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1236 static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1237 					 uint32_t val, uint32_t mask)
1238 {
1239 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1240 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1241 	amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
1242 	amdgpu_ring_write(ring, 0);
1243 	amdgpu_ring_write(ring, val); /* reference */
1244 	amdgpu_ring_write(ring, mask); /* mask */
1245 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1246 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1247 }
1248 
sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)1249 static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1250 						   uint32_t reg0, uint32_t reg1,
1251 						   uint32_t ref, uint32_t mask)
1252 {
1253 	amdgpu_ring_emit_wreg(ring, reg0, ref);
1254 	/* wait for a cycle to reset vm_inv_eng*_ack */
1255 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1256 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1257 }
1258 
1259 static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
1260 	.copy_pte_num_dw = 8,
1261 	.copy_pte = sdma_v7_1_vm_copy_pte,
1262 	.write_pte = sdma_v7_1_vm_write_pte,
1263 	.set_pte_pde = sdma_v7_1_vm_set_pte_pde,
1264 };
1265 
sdma_v7_1_early_init(struct amdgpu_ip_block * ip_block)1266 static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
1267 {
1268 	struct amdgpu_device *adev = ip_block->adev;
1269 	int r;
1270 
1271 	switch (amdgpu_user_queue) {
1272 	case -1:
1273 	default:
1274 		adev->sdma.no_user_submission = true;
1275 		adev->sdma.disable_uq = true;
1276 		break;
1277 	case 0:
1278 		adev->sdma.no_user_submission = false;
1279 		adev->sdma.disable_uq = true;
1280 		break;
1281 	}
1282 
1283 	r = amdgpu_sdma_init_microcode(adev, 0, true);
1284 	if (r) {
1285 		DRM_ERROR("Failed to init sdma firmware!\n");
1286 		return r;
1287 	}
1288 
1289 	sdma_v7_1_set_ring_funcs(adev);
1290 	sdma_v7_1_set_buffer_funcs(adev);
1291 	amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_1_vm_pte_funcs);
1292 	sdma_v7_1_set_irq_funcs(adev);
1293 	sdma_v7_1_set_mqd_funcs(adev);
1294 
1295 	return 0;
1296 }
1297 
sdma_v7_1_sw_init(struct amdgpu_ip_block * ip_block)1298 static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
1299 {
1300 	struct amdgpu_ring *ring;
1301 	int r, i;
1302 	struct amdgpu_device *adev = ip_block->adev;
1303 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1304 	uint32_t *ptr;
1305 	u32 xcc_id;
1306 
1307 	/* SDMA trap event */
1308 	r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
1309 			      GFX_12_1_0__SRCID__SDMA_TRAP,
1310 			      &adev->sdma.trap_irq);
1311 	if (r)
1312 		return r;
1313 
1314 	for (i = 0; i < adev->sdma.num_instances; i++) {
1315 		ring = &adev->sdma.instance[i].ring;
1316 		ring->ring_obj = NULL;
1317 		ring->use_doorbell = true;
1318 		ring->me = i;
1319 
1320 		for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) {
1321 			if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id))
1322 				break;
1323 		}
1324 
1325 		DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
1326 				xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc,
1327 				ring->use_doorbell?"true":"false");
1328 
1329 		ring->doorbell_index =
1330 			(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
1331 
1332 		ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1333 		sprintf(ring->name, "sdma%d.%d", xcc_id,
1334 				GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc);
1335 		r = amdgpu_ring_init(adev, ring, 1024,
1336 				     &adev->sdma.trap_irq,
1337 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1338 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1339 		if (r)
1340 			return r;
1341 	}
1342 
1343 	adev->sdma.supported_reset =
1344 		amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
1345 	if (!amdgpu_sriov_vf(adev) &&
1346 	    !adev->debug_disable_gpu_ring_reset)
1347 		adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1348 
1349 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
1350 	if (r)
1351 		return r;
1352 	/* Allocate memory for SDMA IP Dump buffer */
1353 	ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
1354 	if (ptr)
1355 		adev->sdma.ip_dump = ptr;
1356 	else
1357 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
1358 
1359 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
1360 	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
1361 #endif
1362 
1363 	return r;
1364 }
1365 
sdma_v7_1_sw_fini(struct amdgpu_ip_block * ip_block)1366 static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
1367 {
1368 	struct amdgpu_device *adev = ip_block->adev;
1369 	int i;
1370 
1371 	for (i = 0; i < adev->sdma.num_instances; i++)
1372 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1373 
1374 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
1375 	amdgpu_sdma_destroy_inst_ctx(adev, true);
1376 
1377 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
1378 		sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask);
1379 
1380 	kfree(adev->sdma.ip_dump);
1381 
1382 	return 0;
1383 }
1384 
sdma_v7_1_hw_init(struct amdgpu_ip_block * ip_block)1385 static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
1386 {
1387 	struct amdgpu_device *adev = ip_block->adev;
1388 	uint32_t inst_mask;
1389 
1390 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1391 
1392 	return sdma_v7_1_inst_start(adev, inst_mask);
1393 }
1394 
sdma_v7_1_hw_fini(struct amdgpu_ip_block * ip_block)1395 static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
1396 {
1397 	struct amdgpu_device *adev = ip_block->adev;
1398 
1399 	if (amdgpu_sriov_vf(adev))
1400 		return 0;
1401 
1402 	sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask);
1403 	sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask);
1404 
1405 	return 0;
1406 }
1407 
sdma_v7_1_suspend(struct amdgpu_ip_block * ip_block)1408 static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
1409 {
1410 	return sdma_v7_1_hw_fini(ip_block);
1411 }
1412 
sdma_v7_1_resume(struct amdgpu_ip_block * ip_block)1413 static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
1414 {
1415 	return sdma_v7_1_hw_init(ip_block);
1416 }
1417 
sdma_v7_1_is_idle(struct amdgpu_ip_block * ip_block)1418 static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
1419 {
1420 	struct amdgpu_device *adev = ip_block->adev;
1421 	u32 i;
1422 
1423 	for (i = 0; i < adev->sdma.num_instances; i++) {
1424 		u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
1425 
1426 		if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1427 			return false;
1428 	}
1429 
1430 	return true;
1431 }
1432 
sdma_v7_1_wait_for_idle(struct amdgpu_ip_block * ip_block)1433 static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
1434 {
1435 	unsigned i, j;
1436 	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1437 	struct amdgpu_device *adev = ip_block->adev;
1438 
1439 	for (i = 0; i < adev->usec_timeout; i++) {
1440 		for (j = 0; j < adev->sdma.num_instances; j++) {
1441 			sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
1442 						j, regSDMA0_SDMA_STATUS_REG));
1443 			if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1444 				break;
1445 		}
1446 		if (j == adev->sdma.num_instances)
1447 			return 0;
1448 		udelay(1);
1449 	}
1450 	return -ETIMEDOUT;
1451 }
1452 
sdma_v7_1_ring_preempt_ib(struct amdgpu_ring * ring)1453 static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
1454 {
1455 	int i, r = 0;
1456 	struct amdgpu_device *adev = ring->adev;
1457 	u32 index = 0;
1458 	u64 sdma_gfx_preempt;
1459 
1460 	amdgpu_sdma_get_index_from_ring(ring, &index);
1461 	sdma_gfx_preempt =
1462 		sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
1463 
1464 	/* assert preemption condition */
1465 	amdgpu_ring_set_preempt_cond_exec(ring, false);
1466 
1467 	/* emit the trailing fence */
1468 	ring->trail_seq += 1;
1469 	r = amdgpu_ring_alloc(ring, 10);
1470 	if (r) {
1471 		DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
1472 		return r;
1473 	}
1474 	sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1475 				  ring->trail_seq, 0);
1476 	amdgpu_ring_commit(ring);
1477 
1478 	/* assert IB preemption */
1479 	WREG32(sdma_gfx_preempt, 1);
1480 
1481 	/* poll the trailing fence */
1482 	for (i = 0; i < adev->usec_timeout; i++) {
1483 		if (ring->trail_seq ==
1484 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1485 			break;
1486 		udelay(1);
1487 	}
1488 
1489 	if (i >= adev->usec_timeout) {
1490 		r = -EINVAL;
1491 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1492 	}
1493 
1494 	/* deassert IB preemption */
1495 	WREG32(sdma_gfx_preempt, 0);
1496 
1497 	/* deassert the preemption condition */
1498 	amdgpu_ring_set_preempt_cond_exec(ring, true);
1499 	return r;
1500 }
1501 
sdma_v7_1_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1502 static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
1503 					struct amdgpu_irq_src *source,
1504 					unsigned type,
1505 					enum amdgpu_interrupt_state state)
1506 {
1507 	u32 sdma_cntl;
1508 
1509 	u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
1510 
1511 	sdma_cntl = RREG32(reg_offset);
1512 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
1513 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1514 	WREG32(reg_offset, sdma_cntl);
1515 
1516 	return 0;
1517 }
1518 
sdma_v7_1_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1519 static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
1520 				      struct amdgpu_irq_src *source,
1521 				      struct amdgpu_iv_entry *entry)
1522 {
1523 	int inst, instances, queue, xcc_id = 0;
1524 
1525 	DRM_DEBUG("IH: SDMA trap\n");
1526 
1527 	if (drm_WARN_ON_ONCE(&adev->ddev,
1528 			     adev->enable_mes &&
1529 			     (entry->src_data[0] & AMDGPU_FENCE_MES_QUEUE_FLAG)))
1530 		return 0;
1531 
1532 	queue = entry->ring_id & 0xf;
1533 	if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
1534 		xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
1535 	else
1536 		dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
1537 	inst = ((entry->ring_id & 0xf0) >> 4) +
1538 		GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc;
1539 	for (instances = 0; instances < adev->sdma.num_instances; instances++) {
1540 		if (inst == GET_INST(SDMA0, instances))
1541 			break;
1542 	}
1543 	if (instances > adev->sdma.num_instances - 1) {
1544 		DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
1545 		return -EINVAL;
1546 	}
1547 
1548 	switch (entry->client_id) {
1549 	case SOC_V1_0_IH_CLIENTID_GFX:
1550 		switch (queue) {
1551 		case 0:
1552 			amdgpu_fence_process(&adev->sdma.instance[instances].ring);
1553 			break;
1554 		default:
1555 			break;
1556 		}
1557 		break;
1558 	}
1559 	return 0;
1560 }
1561 
sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1562 static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
1563 					      struct amdgpu_irq_src *source,
1564 					      struct amdgpu_iv_entry *entry)
1565 {
1566 	return 0;
1567 }
1568 
sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)1569 static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
1570 					   enum amd_clockgating_state state)
1571 {
1572 	return 0;
1573 }
1574 
sdma_v7_1_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)1575 static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
1576 					  enum amd_powergating_state state)
1577 {
1578 	return 0;
1579 }
1580 
sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)1581 static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
1582 					    u64 *flags)
1583 {
1584 }
1585 
sdma_v7_1_print_ip_state(struct amdgpu_ip_block * ip_block,struct drm_printer * p)1586 static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
1587 {
1588 	struct amdgpu_device *adev = ip_block->adev;
1589 	int i, j;
1590 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1591 	uint32_t instance_offset;
1592 
1593 	if (!adev->sdma.ip_dump)
1594 		return;
1595 
1596 	drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
1597 	for (i = 0; i < adev->sdma.num_instances; i++) {
1598 		instance_offset = i * reg_count;
1599 		drm_printf(p, "\nInstance:%d\n", i);
1600 
1601 		for (j = 0; j < reg_count; j++)
1602 			drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
1603 				   adev->sdma.ip_dump[instance_offset + j]);
1604 	}
1605 }
1606 
sdma_v7_1_dump_ip_state(struct amdgpu_ip_block * ip_block)1607 static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
1608 {
1609 	struct amdgpu_device *adev = ip_block->adev;
1610 	int i, j;
1611 	uint32_t instance_offset;
1612 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1613 
1614 	if (!adev->sdma.ip_dump)
1615 		return;
1616 
1617 	amdgpu_gfx_off_ctrl(adev, false);
1618 	for (i = 0; i < adev->sdma.num_instances; i++) {
1619 		instance_offset = i * reg_count;
1620 		for (j = 0; j < reg_count; j++)
1621 			adev->sdma.ip_dump[instance_offset + j] =
1622 				RREG32(sdma_v7_1_get_reg_offset(adev, i,
1623 				       sdma_reg_list_7_1[j].reg_offset));
1624 	}
1625 	amdgpu_gfx_off_ctrl(adev, true);
1626 }
1627 
1628 const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
1629 	.name = "sdma_v7_1",
1630 	.early_init = sdma_v7_1_early_init,
1631 	.late_init = NULL,
1632 	.sw_init = sdma_v7_1_sw_init,
1633 	.sw_fini = sdma_v7_1_sw_fini,
1634 	.hw_init = sdma_v7_1_hw_init,
1635 	.hw_fini = sdma_v7_1_hw_fini,
1636 	.suspend = sdma_v7_1_suspend,
1637 	.resume = sdma_v7_1_resume,
1638 	.is_idle = sdma_v7_1_is_idle,
1639 	.wait_for_idle = sdma_v7_1_wait_for_idle,
1640 	.soft_reset = sdma_v7_1_soft_reset,
1641 	.check_soft_reset = sdma_v7_1_check_soft_reset,
1642 	.set_clockgating_state = sdma_v7_1_set_clockgating_state,
1643 	.set_powergating_state = sdma_v7_1_set_powergating_state,
1644 	.get_clockgating_state = sdma_v7_1_get_clockgating_state,
1645 	.dump_ip_state = sdma_v7_1_dump_ip_state,
1646 	.print_ip_state = sdma_v7_1_print_ip_state,
1647 };
1648 
1649 static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
1650 	.type = AMDGPU_RING_TYPE_SDMA,
1651 	.align_mask = 0xf,
1652 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1653 	.support_64bit_ptrs = true,
1654 	.secure_submission_supported = true,
1655 	.get_rptr = sdma_v7_1_ring_get_rptr,
1656 	.get_wptr = sdma_v7_1_ring_get_wptr,
1657 	.set_wptr = sdma_v7_1_ring_set_wptr,
1658 	.emit_frame_size =
1659 		5 + /* sdma_v7_1_ring_init_cond_exec */
1660 		6 + /* sdma_v7_1_ring_emit_pipeline_sync */
1661 		/* sdma_v7_1_ring_emit_vm_flush */
1662 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1663 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1664 		10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
1665 	.emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
1666 	.emit_ib = sdma_v7_1_ring_emit_ib,
1667 	.emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
1668 	.emit_fence = sdma_v7_1_ring_emit_fence,
1669 	.emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
1670 	.emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
1671 	.test_ring = sdma_v7_1_ring_test_ring,
1672 	.test_ib = sdma_v7_1_ring_test_ib,
1673 	.insert_nop = sdma_v7_1_ring_insert_nop,
1674 	.pad_ib = sdma_v7_1_ring_pad_ib,
1675 	.emit_wreg = sdma_v7_1_ring_emit_wreg,
1676 	.emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
1677 	.emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
1678 	.init_cond_exec = sdma_v7_1_ring_init_cond_exec,
1679 	.preempt_ib = sdma_v7_1_ring_preempt_ib,
1680 	.reset = sdma_v7_1_reset_queue,
1681 };
1682 
sdma_v7_1_set_ring_funcs(struct amdgpu_device * adev)1683 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
1684 {
1685 	int i, dev_inst;
1686 
1687 	for (i = 0; i < adev->sdma.num_instances; i++) {
1688 		adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
1689 		adev->sdma.instance[i].ring.me = i;
1690 
1691 		dev_inst = GET_INST(SDMA0, i);
1692 		/* XCC to which SDMA belongs depends on physical instance */
1693 		adev->sdma.instance[i].xcc_id =
1694 			dev_inst / adev->sdma.num_inst_per_xcc;
1695 	}
1696 }
1697 
1698 static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
1699 	.set = sdma_v7_1_set_trap_irq_state,
1700 	.process = sdma_v7_1_process_trap_irq,
1701 };
1702 
1703 static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
1704 	.process = sdma_v7_1_process_illegal_inst_irq,
1705 };
1706 
sdma_v7_1_set_irq_funcs(struct amdgpu_device * adev)1707 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
1708 {
1709 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1710 					adev->sdma.num_instances;
1711 	adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
1712 	adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
1713 }
1714 
1715 /**
1716  * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
1717  *
1718  * @ib: indirect buffer to fill with commands
1719  * @src_offset: src GPU address
1720  * @dst_offset: dst GPU address
1721  * @byte_count: number of bytes to xfer
1722  * @copy_flags: copy flags for the buffers
1723  *
1724  * Copy GPU buffers using the DMA engine.
1725  * Used by the amdgpu ttm implementation to move pages if
1726  * registered as the asic copy callback.
1727  */
sdma_v7_1_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,uint32_t copy_flags)1728 static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
1729 				       uint64_t src_offset,
1730 				       uint64_t dst_offset,
1731 				       uint32_t byte_count,
1732 				       uint32_t copy_flags)
1733 {
1734 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1735 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1736 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
1737 
1738 	ib->ptr[ib->length_dw++] = byte_count - 1;
1739 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1740 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1741 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1742 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1743 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1744 }
1745 
1746 /**
1747  * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
1748  *
1749  * @ib: indirect buffer to fill
1750  * @src_data: value to write to buffer
1751  * @dst_offset: dst GPU address
1752  * @byte_count: number of bytes to xfer
1753  *
1754  * Fill GPU buffers using the DMA engine.
1755  */
sdma_v7_1_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)1756 static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
1757 				       uint32_t src_data,
1758 				       uint64_t dst_offset,
1759 				       uint32_t byte_count)
1760 {
1761 	ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
1762 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1763 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1764 	ib->ptr[ib->length_dw++] = src_data;
1765 	ib->ptr[ib->length_dw++] = byte_count - 1;
1766 }
1767 
1768 static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
1769 	.copy_max_bytes = 1 << 30,
1770 	.copy_num_dw = 8,
1771 	.emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
1772 	.fill_max_bytes = 1 << 30,
1773 	.fill_num_dw = 5,
1774 	.emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
1775 };
1776 
sdma_v7_1_set_buffer_funcs(struct amdgpu_device * adev)1777 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
1778 {
1779 	adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
1780 	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1781 }
1782 
1783 const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
1784 	.type = AMD_IP_BLOCK_TYPE_SDMA,
1785 	.major = 7,
1786 	.minor = 1,
1787 	.rev = 0,
1788 	.funcs = &sdma_v7_1_ip_funcs,
1789 };
1790 
sdma_v7_1_xcp_resume(void * handle,uint32_t inst_mask)1791 static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
1792 {
1793 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1794 	int r;
1795 
1796 	r = sdma_v7_1_inst_start(adev, inst_mask);
1797 
1798 	return r;
1799 }
1800 
sdma_v7_1_xcp_suspend(void * handle,uint32_t inst_mask)1801 static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
1802 {
1803 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1804 
1805 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1806 	sdma_v7_1_inst_enable(adev, false, inst_mask);
1807 
1808 	return 0;
1809 }
1810 
1811 struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
1812 	.suspend = &sdma_v7_1_xcp_suspend,
1813 	.resume = &sdma_v7_1_xcp_resume
1814 };
1815