xref: /linux/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c (revision def3488eb0fdb386044aced1a8fb2592b1e68896)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_trace.h"
32 
33 #include "gc/gc_12_1_0_offset.h"
34 #include "gc/gc_12_1_0_sh_mask.h"
35 #include "hdp/hdp_6_0_0_offset.h"
36 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
37 
38 #include "soc15_common.h"
39 #include "soc15.h"
40 #include "sdma_v7_1_0_pkt_open.h"
41 #include "nbio_v4_3.h"
42 #include "sdma_common.h"
43 #include "sdma_v7_1.h"
44 #include "v12_structs.h"
45 #include "mes_userqueue.h"
46 
47 MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
48 
49 #define SDMA1_REG_OFFSET 0x600
50 #define SDMA0_SDMA_IDX_0_END 0x450
51 #define SDMA1_HYP_DEC_REG_OFFSET 0x30
52 
53 static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
54 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
55 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
56 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
57 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
58 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
59 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
60 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
61 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
62 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
63 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
64 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
65 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
66 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
67 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
68 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
69 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
70 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
71 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
72 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
73 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
74 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
75 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
76 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
77 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
78 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
79 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
80 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
81 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
82 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
83 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
84 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
85 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
86 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
87 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
88 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
89 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
90 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
91 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
92 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
93 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
94 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
95 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
96 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
97 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
98 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
99 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
100 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
101 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
102 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
103 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
104 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
105 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
106 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
107 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
108 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
109 };
110 
111 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
112 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
113 static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev);
114 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
115 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
116 				uint32_t inst_mask);
117 
118 static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
119 {
120 	u32 base;
121 	u32 dev_inst = GET_INST(SDMA0, instance);
122 	int xcc_id = adev->sdma.instance[instance].xcc_id;
123 	int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
124 
125 	if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
126 		base = adev->reg_offset[GC_HWIP][xcc_id][1];
127 		if (xcc_inst != 0)
128 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
129 	} else {
130 		base = adev->reg_offset[GC_HWIP][xcc_id][0];
131 		if (xcc_inst != 0)
132 			internal_offset += SDMA1_REG_OFFSET * xcc_inst;
133 	}
134 
135 	return base + internal_offset;
136 }
137 
138 static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
139 					      uint64_t addr)
140 {
141 	unsigned ret;
142 
143 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
144 	amdgpu_ring_write(ring, lower_32_bits(addr));
145 	amdgpu_ring_write(ring, upper_32_bits(addr));
146 	amdgpu_ring_write(ring, 1);
147 	/* this is the offset we need patch later */
148 	ret = ring->wptr & ring->buf_mask;
149 	/* insert dummy here and patch it later */
150 	amdgpu_ring_write(ring, 0);
151 
152 	return ret;
153 }
154 
155 /**
156  * sdma_v7_1_ring_get_rptr - get the current read pointer
157  *
158  * @ring: amdgpu ring pointer
159  *
160  * Get the current rptr from the hardware.
161  */
162 static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
163 {
164 	u64 *rptr;
165 
166 	/* XXX check if swapping is necessary on BE */
167 	rptr = (u64 *)ring->rptr_cpu_addr;
168 
169 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
170 	return ((*rptr) >> 2);
171 }
172 
173 /**
174  * sdma_v7_1_ring_get_wptr - get the current write pointer
175  *
176  * @ring: amdgpu ring pointer
177  *
178  * Get the current wptr from the hardware.
179  */
180 static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
181 {
182 	u64 wptr = 0;
183 
184 	if (ring->use_doorbell) {
185 		/* XXX check if swapping is necessary on BE */
186 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
187 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
188 	}
189 
190 	return wptr >> 2;
191 }
192 
193 /**
194  * sdma_v7_1_ring_set_wptr - commit the write pointer
195  *
196  * @ring: amdgpu ring pointer
197  *
198  * Write the wptr back to the hardware.
199  */
200 static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
201 {
202 	struct amdgpu_device *adev = ring->adev;
203 
204 	DRM_DEBUG("Setting write pointer\n");
205 
206 	if (ring->use_doorbell) {
207 		DRM_DEBUG("Using doorbell -- "
208 			  "wptr_offs == 0x%08x "
209 			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
210 			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
211 			  ring->wptr_offs,
212 			  lower_32_bits(ring->wptr << 2),
213 			  upper_32_bits(ring->wptr << 2));
214 		/* XXX check if swapping is necessary on BE */
215 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
216 			     ring->wptr << 2);
217 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
218 			  ring->doorbell_index, ring->wptr << 2);
219 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
220 	} else {
221 		DRM_DEBUG("Not using doorbell -- "
222 			  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
223 			  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
224 			  ring->me,
225 			  lower_32_bits(ring->wptr << 2),
226 			  ring->me,
227 			  upper_32_bits(ring->wptr << 2));
228 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
229 							     ring->me,
230 							     regSDMA0_SDMA_QUEUE0_RB_WPTR),
231 				lower_32_bits(ring->wptr << 2));
232 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
233 							     ring->me,
234 							     regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
235 				upper_32_bits(ring->wptr << 2));
236 	}
237 }
238 
239 static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
240 {
241 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
242 	int i;
243 
244 	for (i = 0; i < count; i++)
245 		if (sdma && sdma->burst_nop && (i == 0))
246 			amdgpu_ring_write(ring, ring->funcs->nop |
247 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
248 		else
249 			amdgpu_ring_write(ring, ring->funcs->nop);
250 }
251 
252 /**
253  * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
254  *
255  * @ring: amdgpu ring pointer
256  * @job: job to retrieve vmid from
257  * @ib: IB object to schedule
258  * @flags: unused
259  *
260  * Schedule an IB in the DMA ring.
261  */
262 static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
263 				   struct amdgpu_job *job,
264 				   struct amdgpu_ib *ib,
265 				   uint32_t flags)
266 {
267 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
268 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
269 
270 	/* An IB packet must end on a 8 DW boundary--the next dword
271 	 * must be on a 8-dword boundary. Our IB packet below is 6
272 	 * dwords long, thus add x number of NOPs, such that, in
273 	 * modular arithmetic,
274 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
275 	 * (wptr + 6 + x) % 8 = 0.
276 	 * The expression below, is a solution of x.
277 	 */
278 	sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
279 
280 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
281 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
282 	/* base must be 32 byte aligned */
283 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
284 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
285 	amdgpu_ring_write(ring, ib->length_dw);
286 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
287 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
288 }
289 
290 /**
291  * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
292  *
293  * @ring: amdgpu ring pointer
294  *
295  * flush the IB by graphics cache rinse.
296  */
297 static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
298 {
299 	uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
300 		SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
301 		SDMA_GCR_GLI_INV(1);
302 
303 	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
304 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
305 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
306 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
307 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
308 			  SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
309 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
310 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
311 			  SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
312 }
313 
314 
315 /**
316  * sdma_v7_1_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
317  *
318  * @ring: amdgpu ring pointer
319  *
320  * Emit an hdp flush packet on the requested DMA ring.
321  */
322 static void sdma_v7_1_ring_emit_hdp_flush(struct amdgpu_ring *ring)
323 {
324 	struct amdgpu_device *adev = ring->adev;
325 	u32 ref_and_mask = 0;
326 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
327 
328 	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
329 				<< (ring->me % adev->sdma.num_inst_per_xcc);
330 
331 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
332 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
333 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
334 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
335 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
336 	amdgpu_ring_write(ring, ref_and_mask); /* reference */
337 	amdgpu_ring_write(ring, ref_and_mask); /* mask */
338 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
339 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
340 }
341 
342 /**
343  * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
344  *
345  * @ring: amdgpu ring pointer
346  * @addr: address
347  * @seq: fence seq number
348  * @flags: fence flags
349  *
350  * Add a DMA fence packet to the ring to write
351  * the fence seq number and DMA trap packet to generate
352  * an interrupt if needed.
353  */
354 static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
355 				      unsigned flags)
356 {
357 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
358 	/* write the fence */
359 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
360 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
361 	/* zero in first two bits */
362 	BUG_ON(addr & 0x3);
363 	amdgpu_ring_write(ring, lower_32_bits(addr));
364 	amdgpu_ring_write(ring, upper_32_bits(addr));
365 	amdgpu_ring_write(ring, lower_32_bits(seq));
366 
367 	/* optionally write high bits as well */
368 	if (write64bit) {
369 		addr += 4;
370 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
371 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
372 		/* zero in first two bits */
373 		BUG_ON(addr & 0x3);
374 		amdgpu_ring_write(ring, lower_32_bits(addr));
375 		amdgpu_ring_write(ring, upper_32_bits(addr));
376 		amdgpu_ring_write(ring, upper_32_bits(seq));
377 	}
378 
379 	if (flags & AMDGPU_FENCE_FLAG_INT) {
380 		/* generate an interrupt */
381 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
382 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
383 	}
384 }
385 
386 /**
387  * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
388  *
389  * @adev: amdgpu_device pointer
390  * @inst_mask: mask of dma engine instances to be disabled
391  *
392  * Stop the gfx async dma ring buffers.
393  */
394 static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
395 				    uint32_t inst_mask)
396 {
397 	u32 rb_cntl, ib_cntl;
398 	int i;
399 
400 	for_each_inst(i, inst_mask) {
401 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
402 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
403 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
404 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
405 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
406 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
407 	}
408 }
409 
410 /**
411  * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
412  *
413  * @adev: amdgpu_device pointer
414  * @inst_mask: mask of dma engine instances to be disabled
415  *
416  * Stop the compute async dma queues.
417  */
418 static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
419 				    uint32_t inst_mask)
420 {
421 	/* XXX todo */
422 }
423 
424 /**
425  * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
426  *
427  * @adev: amdgpu_device pointer
428  * @enable: enable/disable the DMA MEs context switch.
429  * @inst_mask: mask of dma engine instances to be enabled
430  *
431  * Halt or unhalt the async dma engines context switch.
432  */
433 static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
434 					     bool enable, uint32_t inst_mask)
435 {
436 	int i;
437 
438 	for_each_inst(i, inst_mask) {
439 		WREG32_SOC15_IP(GC,
440 			sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
441 	}
442 }
443 
444 /**
445  * sdma_v7_1_inst_enable - stop the async dma engines
446  *
447  * @adev: amdgpu_device pointer
448  * @enable: enable/disable the DMA MEs.
449  * @inst_mask: mask of dma engine instances to be enabled
450  *
451  * Halt or unhalt the async dma engines.
452  */
453 static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
454 				  bool enable, uint32_t inst_mask)
455 {
456 	u32 mcu_cntl;
457 	int i;
458 
459 	if (!enable) {
460 		sdma_v7_1_inst_gfx_stop(adev, inst_mask);
461 		sdma_v7_1_inst_rlc_stop(adev, inst_mask);
462 	}
463 
464 	if (amdgpu_sriov_vf(adev))
465 		return;
466 
467 	for_each_inst(i, inst_mask) {
468 		mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
469 		mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
470 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
471 	}
472 }
473 
474 /**
475  * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
476  *
477  * @adev: amdgpu_device pointer
478  * @i: instance
479  * @restore: used to restore wptr when restart
480  *
481  * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
482  * Return 0 for success.
483  */
484 static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
485 {
486 	struct amdgpu_ring *ring;
487 	u32 rb_cntl, ib_cntl;
488 	u32 rb_bufsz;
489 	u32 doorbell;
490 	u32 doorbell_offset;
491 	u32 temp;
492 	u64 wptr_gpu_addr;
493 	int r;
494 
495 	ring = &adev->sdma.instance[i].ring;
496 
497 	/* Set ring buffer size in dwords */
498 	rb_bufsz = order_base_2(ring->ring_size / 4);
499 	rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
500 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
501 #ifdef __BIG_ENDIAN
502 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
503 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
504 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
505 #endif
506 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
507 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
508 
509 	/* Initialize the ring buffer's read and write pointers */
510 	if (restore) {
511 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
512 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
513 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
514 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
515 	} else {
516 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
517 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
518 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
519 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
520 	}
521 	/* setup the wptr shadow polling */
522 	wptr_gpu_addr = ring->wptr_gpu_addr;
523 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
524 	       lower_32_bits(wptr_gpu_addr));
525 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
526 	       upper_32_bits(wptr_gpu_addr));
527 
528 	/* set the wb address whether it's enabled or not */
529 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
530 	       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
531 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
532 	       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
533 
534 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
535 	if (amdgpu_sriov_vf(adev))
536 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
537 	else
538 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
539 
540 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
541 
542 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
543 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
544 
545 	if (!restore)
546 		ring->wptr = 0;
547 
548 	/* before programing wptr to a less value, need set minor_ptr_update first */
549 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
550 
551 	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
552 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
553 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
554 	}
555 
556 	doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
557 	doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
558 
559 	if (ring->use_doorbell) {
560 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
561 		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
562 				OFFSET, ring->doorbell_index);
563 	} else {
564 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
565 	}
566 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
567 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
568 
569 	if (i == 0)
570 		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
571 					      ring->doorbell_index,
572 					      adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
573 
574 	if (amdgpu_sriov_vf(adev))
575 		sdma_v7_1_ring_set_wptr(ring);
576 
577 	/* set minor_ptr_update to 0 after wptr programed */
578 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
579 
580 	/* Set up sdma hang watchdog */
581 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
582 	/* 100ms per unit */
583 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
584 			     max(adev->usec_timeout/100000, 1));
585 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
586 
587 	/* Set up RESP_MODE to non-copy addresses */
588 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
589 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
590 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
591 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
592 
593 	/* program default cache read and write policy */
594 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
595 	/* clean read policy and write policy bits */
596 	temp &= 0xFF0FFF;
597 	temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
598 		 (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
599 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
600 
601 	if (!amdgpu_sriov_vf(adev)) {
602 		/* unhalt engine */
603 		temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
604 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
605 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
606 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
607 	}
608 
609 	/* enable DMA RB */
610 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
611 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
612 
613 	ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
614 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
615 #ifdef __BIG_ENDIAN
616 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
617 #endif
618 	/* enable DMA IBs */
619 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
620 	ring->sched.ready = true;
621 
622 	if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
623 		sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
624 		sdma_v7_1_inst_enable(adev, true, i);
625 	}
626 
627 	r = amdgpu_ring_test_helper(ring);
628 	if (r)
629 		ring->sched.ready = false;
630 
631 	return r;
632 }
633 
634 /**
635  * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
636  *
637  * @adev: amdgpu_device pointer
638  * inst_mask: mask of dma engine instances to be enabled
639  *
640  * Set up the gfx DMA ring buffers and enable them.
641  * Returns 0 for success, error for failure.
642  */
643 static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
644 				     uint32_t inst_mask)
645 {
646 	int i, r;
647 
648 	for_each_inst(i, inst_mask) {
649 		r = sdma_v7_1_gfx_resume_instance(adev, i, false);
650 		if (r)
651 			return r;
652 	}
653 
654 	return 0;
655 
656 }
657 
658 /**
659  * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
660  *
661  * @adev: amdgpu_device pointer
662  * @inst_mask: mask of dma engine instances to be enabled
663  *
664  * Set up the compute DMA queues and enable them.
665  * Returns 0 for success, error for failure.
666  */
667 static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
668 				     uint32_t inst_mask)
669 {
670 	return 0;
671 }
672 
673 static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
674 					     uint32_t inst_mask)
675 {
676 	int i;
677 
678 	for_each_inst(i, inst_mask) {
679 		amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
680 				      &adev->sdma.instance[i].sdma_fw_gpu_addr,
681 				      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
682 	}
683 }
684 
685 /**
686  * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
687  *
688  * @adev: amdgpu_device pointer
689  * @inst_mask: mask of dma engine instances to be enabled
690  *
691  * Loads the sDMA0/1 ucode.
692  * Returns 0 for success, -EINVAL if the ucode is not available.
693  */
694 static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
695 					 uint32_t inst_mask)
696 {
697 	const struct sdma_firmware_header_v3_0 *hdr;
698 	const __le32 *fw_data;
699 	u32 fw_size;
700 	uint32_t tmp, sdma_status, ic_op_cntl;
701 	int i, r, j;
702 
703 	/* halt the MEs */
704 	sdma_v7_1_inst_enable(adev, false, inst_mask);
705 
706 	if (!adev->sdma.instance[0].fw)
707 		return -EINVAL;
708 
709 	hdr = (const struct sdma_firmware_header_v3_0 *)
710 		adev->sdma.instance[0].fw->data;
711 	amdgpu_ucode_print_sdma_hdr(&hdr->header);
712 
713 	fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
714 			le32_to_cpu(hdr->ucode_offset_bytes));
715 	fw_size = le32_to_cpu(hdr->ucode_size_bytes);
716 
717 	for_each_inst(i, inst_mask) {
718 		r = amdgpu_bo_create_reserved(adev, fw_size,
719 					      PAGE_SIZE,
720 					      AMDGPU_GEM_DOMAIN_VRAM,
721 					      &adev->sdma.instance[i].sdma_fw_obj,
722 					      &adev->sdma.instance[i].sdma_fw_gpu_addr,
723 					      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
724 		if (r) {
725 			dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
726 			return r;
727 		}
728 
729 		memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
730 
731 		amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
732 		amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
733 
734 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
735 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
736 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
737 
738 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
739 			lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
740 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
741 			upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
742 
743 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
744 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
745 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
746 
747 		/* Wait for sdma ucode init complete */
748 		for (j = 0; j < adev->usec_timeout; j++) {
749 			ic_op_cntl = RREG32_SOC15_IP(GC,
750 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
751 			sdma_status = RREG32_SOC15_IP(GC,
752 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
753 			if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
754 			    (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
755 				break;
756 			udelay(1);
757 		}
758 
759 		if (j >= adev->usec_timeout) {
760 			dev_err(adev->dev, "failed to init sdma ucode\n");
761 			return -EINVAL;
762 		}
763 	}
764 
765 	return 0;
766 }
767 
768 static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
769 {
770 	struct amdgpu_device *adev = ip_block->adev;
771 	uint32_t inst_mask;
772 	u32 tmp;
773 	int i;
774 
775 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
776 	sdma_v7_1_inst_gfx_stop(adev, inst_mask);
777 
778 	for_each_inst(i, inst_mask) {
779 		//tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
780 		//tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
781 		//WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
782 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
783 		tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
784 		tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
785 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
786 
787 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
788 
789 		udelay(100);
790 
791 		tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
792 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
793 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
794 
795 		udelay(100);
796 
797 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
798 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
799 
800 		udelay(100);
801 	}
802 
803 	return sdma_v7_1_inst_start(adev, inst_mask);
804 }
805 
806 static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
807 {
808 	struct amdgpu_device *adev = ip_block->adev;
809 	struct amdgpu_ring *ring;
810 	int i, r;
811 	long tmo = msecs_to_jiffies(1000);
812 
813 	for (i = 0; i < adev->sdma.num_instances; i++) {
814 		ring = &adev->sdma.instance[i].ring;
815 		r = amdgpu_ring_test_ib(ring, tmo);
816 		if (r)
817 			return true;
818 	}
819 
820 	return false;
821 }
822 
823 static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
824 				 unsigned int vmid,
825 				 struct amdgpu_fence *timedout_fence)
826 {
827 	struct amdgpu_device *adev = ring->adev;
828 	int r;
829 
830 	if (ring->me >= adev->sdma.num_instances) {
831 		dev_err(adev->dev, "sdma instance not found\n");
832 		return -EINVAL;
833 	}
834 
835 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
836 
837 	r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
838 	if (r)
839 		return r;
840 
841 	r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
842 	if (r)
843 		return r;
844 
845 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
846 }
847 
848 /**
849  * sdma_v7_1_inst_start - setup and start the async dma engines
850  *
851  * @adev: amdgpu_device pointer
852  * @inst_mask: mask of dma engine instances to be enabled
853  *
854  * Set up the DMA engines and enable them.
855  * Returns 0 for success, error for failure.
856  */
857 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
858 				uint32_t inst_mask)
859 {
860 	int r = 0;
861 
862 	if (amdgpu_sriov_vf(adev)) {
863 		sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
864 		sdma_v7_1_inst_enable(adev, false, inst_mask);
865 
866 		/* set RB registers */
867 		r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
868 		return r;
869 	}
870 
871 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
872 		r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
873 		if (r) {
874 			sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
875 			return r;
876 		}
877 
878 		if (amdgpu_emu_mode == 1)
879 			msleep(1000);
880 	}
881 
882 	/* unhalt the MEs */
883 	sdma_v7_1_inst_enable(adev, true, inst_mask);
884 	/* enable sdma ring preemption */
885 	sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
886 
887 	/* start the gfx rings and rlc compute queues */
888 	r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
889 	if (r)
890 		return r;
891 	r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
892 
893 	return r;
894 }
895 
896 static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
897 			      struct amdgpu_mqd_prop *prop)
898 {
899 	struct v12_sdma_mqd *m = mqd;
900 	uint64_t wb_gpu_addr;
901 
902 	m->sdmax_rlcx_rb_cntl =
903 		order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
904 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
905 		4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
906 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
907 
908 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
909 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
910 
911 	wb_gpu_addr = prop->wptr_gpu_addr;
912 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
913 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
914 
915 	wb_gpu_addr = prop->rptr_gpu_addr;
916 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
917 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
918 
919 	m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
920 							regSDMA0_SDMA_QUEUE0_IB_CNTL));
921 
922 	m->sdmax_rlcx_doorbell_offset =
923 		prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
924 
925 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
926 
927 	m->sdmax_rlcx_doorbell_log = 0;
928 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
929 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
930 
931 	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
932 	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
933 
934 	return 0;
935 }
936 
937 static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
938 {
939 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
940 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
941 }
942 
943 /**
944  * sdma_v7_1_ring_test_ring - simple async dma engine test
945  *
946  * @ring: amdgpu_ring structure holding ring information
947  *
948  * Test the DMA engine by writing using it to write an
949  * value to memory.
950  * Returns 0 for success, error for failure.
951  */
952 static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
953 {
954 	struct amdgpu_device *adev = ring->adev;
955 	unsigned i;
956 	unsigned index;
957 	int r;
958 	u32 tmp;
959 	u64 gpu_addr;
960 
961 	tmp = 0xCAFEDEAD;
962 
963 	r = amdgpu_device_wb_get(adev, &index);
964 	if (r) {
965 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
966 		return r;
967 	}
968 
969 	gpu_addr = adev->wb.gpu_addr + (index * 4);
970 	adev->wb.wb[index] = cpu_to_le32(tmp);
971 
972 	r = amdgpu_ring_alloc(ring, 5);
973 	if (r) {
974 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
975 		amdgpu_device_wb_free(adev, index);
976 		return r;
977 	}
978 
979 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
980 			  SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
981 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
982 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
983 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
984 	amdgpu_ring_write(ring, 0xDEADBEEF);
985 	amdgpu_ring_commit(ring);
986 
987 	for (i = 0; i < adev->usec_timeout; i++) {
988 		tmp = le32_to_cpu(adev->wb.wb[index]);
989 		if (tmp == 0xDEADBEEF)
990 			break;
991 		if (amdgpu_emu_mode == 1)
992 			msleep(1);
993 		else
994 			udelay(1);
995 	}
996 
997 	if (i >= adev->usec_timeout)
998 		r = -ETIMEDOUT;
999 
1000 	amdgpu_device_wb_free(adev, index);
1001 
1002 	return r;
1003 }
1004 
1005 /**
1006  * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
1007  *
1008  * @ring: amdgpu_ring structure holding ring information
1009  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1010  *
1011  * Test a simple IB in the DMA ring.
1012  * Returns 0 on success, error on failure.
1013  */
1014 static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1015 {
1016 	struct amdgpu_device *adev = ring->adev;
1017 	struct amdgpu_ib ib;
1018 	struct dma_fence *f = NULL;
1019 	unsigned index;
1020 	long r;
1021 	u32 tmp = 0;
1022 	u64 gpu_addr;
1023 
1024 	tmp = 0xCAFEDEAD;
1025 	memset(&ib, 0, sizeof(ib));
1026 
1027 	r = amdgpu_device_wb_get(adev, &index);
1028 	if (r) {
1029 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1030 		return r;
1031 	}
1032 
1033 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1034 	adev->wb.wb[index] = cpu_to_le32(tmp);
1035 
1036 	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
1037 	if (r) {
1038 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1039 		goto err0;
1040 	}
1041 
1042 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1043 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1044 	ib.ptr[1] = lower_32_bits(gpu_addr);
1045 	ib.ptr[2] = upper_32_bits(gpu_addr);
1046 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1047 	ib.ptr[4] = 0xDEADBEEF;
1048 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1049 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1050 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1051 	ib.length_dw = 8;
1052 
1053 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1054 	if (r)
1055 		goto err1;
1056 
1057 	r = dma_fence_wait_timeout(f, false, timeout);
1058 	if (r == 0) {
1059 		DRM_ERROR("amdgpu: IB test timed out\n");
1060 		r = -ETIMEDOUT;
1061 		goto err1;
1062 	} else if (r < 0) {
1063 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1064 		goto err1;
1065 	}
1066 
1067 	tmp = le32_to_cpu(adev->wb.wb[index]);
1068 
1069 	if (tmp == 0xDEADBEEF)
1070 		r = 0;
1071 	else
1072 		r = -EINVAL;
1073 
1074 err1:
1075 	amdgpu_ib_free(&ib, NULL);
1076 	dma_fence_put(f);
1077 err0:
1078 	amdgpu_device_wb_free(adev, index);
1079 	return r;
1080 }
1081 
1082 
1083 /**
1084  * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
1085  *
1086  * @ib: indirect buffer to fill with commands
1087  * @pe: addr of the page entry
1088  * @src: src addr to copy from
1089  * @count: number of page entries to update
1090  *
1091  * Update PTEs by copying them from the GART using sDMA.
1092  */
1093 static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
1094 				  uint64_t pe, uint64_t src,
1095 				  unsigned count)
1096 {
1097 	unsigned bytes = count * 8;
1098 
1099 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1100 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1101 
1102 	ib->ptr[ib->length_dw++] = bytes - 1;
1103 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1104 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1105 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1106 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1107 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1108 
1109 }
1110 
1111 /**
1112  * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
1113  *
1114  * @ib: indirect buffer to fill with commands
1115  * @pe: addr of the page entry
1116  * @value: dst addr to write into pe
1117  * @count: number of page entries to update
1118  * @incr: increase next addr by incr bytes
1119  *
1120  * Update PTEs by writing them manually using sDMA.
1121  */
1122 static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1123 				   uint64_t value, unsigned count,
1124 				   uint32_t incr)
1125 {
1126 	unsigned ndw = count * 2;
1127 
1128 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1129 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1130 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1131 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1132 	ib->ptr[ib->length_dw++] = ndw - 1;
1133 	for (; ndw > 0; ndw -= 2) {
1134 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1135 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1136 		value += incr;
1137 	}
1138 }
1139 
1140 /**
1141  * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
1142  *
1143  * @ib: indirect buffer to fill with commands
1144  * @pe: addr of the page entry
1145  * @addr: dst addr to write into pe
1146  * @count: number of page entries to update
1147  * @incr: increase next addr by incr bytes
1148  * @flags: access flags
1149  *
1150  * Update the page tables using sDMA.
1151  */
1152 static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
1153 				     uint64_t pe,
1154 				     uint64_t addr, unsigned count,
1155 				     uint32_t incr, uint64_t flags)
1156 {
1157 	/* for physically contiguous pages (vram) */
1158 	u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
1159 
1160 	if (amdgpu_mtype_local)
1161 		header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3);
1162 	else
1163 		header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2) |
1164 			   SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
1165 			   SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3));
1166 
1167 	ib->ptr[ib->length_dw++] = header;
1168 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1169 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1170 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1171 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1172 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1173 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1174 	ib->ptr[ib->length_dw++] = incr; /* increment size */
1175 	ib->ptr[ib->length_dw++] = 0;
1176 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1177 }
1178 
1179 /**
1180  * sdma_v7_1_ring_pad_ib - pad the IB
1181  *
1182  * @ring: amdgpu ring pointer
1183  * @ib: indirect buffer to fill with padding
1184  *
1185  * Pad the IB with NOPs to a boundary multiple of 8.
1186  */
1187 static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1188 {
1189 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1190 	u32 pad_count;
1191 	int i;
1192 
1193 	pad_count = (-ib->length_dw) & 0x7;
1194 	for (i = 0; i < pad_count; i++)
1195 		if (sdma && sdma->burst_nop && (i == 0))
1196 			ib->ptr[ib->length_dw++] =
1197 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
1198 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1199 		else
1200 			ib->ptr[ib->length_dw++] =
1201 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
1202 }
1203 
1204 /**
1205  * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
1206  *
1207  * @ring: amdgpu_ring pointer
1208  *
1209  * Make sure all previous operations are completed (CIK).
1210  */
1211 static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1212 {
1213 	uint32_t seq = ring->fence_drv.sync_seq;
1214 	uint64_t addr = ring->fence_drv.gpu_addr;
1215 
1216 	/* wait for idle */
1217 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1218 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1219 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1220 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1221 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1222 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1223 	amdgpu_ring_write(ring, seq); /* reference */
1224 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
1225 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1226 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1227 }
1228 
1229 /**
1230  * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
1231  *
1232  * @ring: amdgpu_ring pointer
1233  * @vmid: vmid number to use
1234  * @pd_addr: address
1235  *
1236  * Update the page table base and flush the VM TLB
1237  * using sDMA.
1238  */
1239 static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
1240 					 unsigned vmid, uint64_t pd_addr)
1241 {
1242 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1243 }
1244 
1245 static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
1246 				     uint32_t reg, uint32_t val)
1247 {
1248 	/* SRBM WRITE command will not support on sdma v7.
1249 	 * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
1250 	 */
1251 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
1252 	amdgpu_ring_write(ring, reg << 2);
1253 	amdgpu_ring_write(ring, val);
1254 }
1255 
1256 static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1257 					 uint32_t val, uint32_t mask)
1258 {
1259 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1260 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1261 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1262 	amdgpu_ring_write(ring, reg << 2);
1263 	amdgpu_ring_write(ring, 0);
1264 	amdgpu_ring_write(ring, val); /* reference */
1265 	amdgpu_ring_write(ring, mask); /* mask */
1266 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1267 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1268 }
1269 
1270 static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1271 						   uint32_t reg0, uint32_t reg1,
1272 						   uint32_t ref, uint32_t mask)
1273 {
1274 	amdgpu_ring_emit_wreg(ring, reg0, ref);
1275 	/* wait for a cycle to reset vm_inv_eng*_ack */
1276 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1277 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1278 }
1279 
1280 static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
1281 {
1282 	struct amdgpu_device *adev = ip_block->adev;
1283 	int r;
1284 
1285 	r = amdgpu_sdma_init_microcode(adev, 0, true);
1286 	if (r) {
1287 		DRM_ERROR("Failed to init sdma firmware!\n");
1288 		return r;
1289 	}
1290 
1291 	sdma_v7_1_set_ring_funcs(adev);
1292 	sdma_v7_1_set_buffer_funcs(adev);
1293 	sdma_v7_1_set_vm_pte_funcs(adev);
1294 	sdma_v7_1_set_irq_funcs(adev);
1295 	sdma_v7_1_set_mqd_funcs(adev);
1296 
1297 	return 0;
1298 }
1299 
1300 static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
1301 {
1302 	struct amdgpu_ring *ring;
1303 	int r, i;
1304 	struct amdgpu_device *adev = ip_block->adev;
1305 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1306 	uint32_t *ptr;
1307 	u32 xcc_id;
1308 
1309 	/* SDMA trap event */
1310 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1311 			      GFX_11_0_0__SRCID__SDMA_TRAP,
1312 			      &adev->sdma.trap_irq);
1313 	if (r)
1314 		return r;
1315 
1316 	for (i = 0; i < adev->sdma.num_instances; i++) {
1317 		ring = &adev->sdma.instance[i].ring;
1318 		ring->ring_obj = NULL;
1319 		ring->use_doorbell = true;
1320 		ring->me = i;
1321 		xcc_id = adev->sdma.instance[i].xcc_id;
1322 
1323 		DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
1324 				xcc_id, i % adev->sdma.num_inst_per_xcc,
1325 				ring->use_doorbell?"true":"false");
1326 
1327 		ring->doorbell_index =
1328 			(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
1329 
1330 		ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1331 		sprintf(ring->name, "sdma%d.%d", xcc_id,
1332 				i % adev->sdma.num_inst_per_xcc);
1333 		r = amdgpu_ring_init(adev, ring, 1024,
1334 				     &adev->sdma.trap_irq,
1335 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1336 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1337 		if (r)
1338 			return r;
1339 	}
1340 
1341 	adev->sdma.supported_reset =
1342 		amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
1343 	if (!amdgpu_sriov_vf(adev) &&
1344 	    !adev->debug_disable_gpu_ring_reset)
1345 		adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1346 
1347 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
1348 	if (r)
1349 		return r;
1350 	/* Allocate memory for SDMA IP Dump buffer */
1351 	ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
1352 	if (ptr)
1353 		adev->sdma.ip_dump = ptr;
1354 	else
1355 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
1356 
1357 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
1358 	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
1359 #endif
1360 
1361 	return r;
1362 }
1363 
1364 static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
1365 {
1366 	struct amdgpu_device *adev = ip_block->adev;
1367 	uint32_t inst_mask;
1368 	int i;
1369 
1370 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1371 
1372 	for (i = 0; i < adev->sdma.num_instances; i++)
1373 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1374 
1375 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
1376 	amdgpu_sdma_destroy_inst_ctx(adev, true);
1377 
1378 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
1379 		sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
1380 
1381 	kfree(adev->sdma.ip_dump);
1382 
1383 	return 0;
1384 }
1385 
1386 static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
1387 {
1388 	struct amdgpu_device *adev = ip_block->adev;
1389 	uint32_t inst_mask;
1390 
1391 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1392 
1393 	return sdma_v7_1_inst_start(adev, inst_mask);
1394 }
1395 
1396 static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
1397 {
1398 	struct amdgpu_device *adev = ip_block->adev;
1399 	uint32_t inst_mask;
1400 
1401 	if (amdgpu_sriov_vf(adev))
1402 		return 0;
1403 
1404 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1405 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1406 	sdma_v7_1_inst_enable(adev, false, inst_mask);
1407 
1408 	return 0;
1409 }
1410 
1411 static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
1412 {
1413 	return sdma_v7_1_hw_fini(ip_block);
1414 }
1415 
1416 static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
1417 {
1418 	return sdma_v7_1_hw_init(ip_block);
1419 }
1420 
1421 static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
1422 {
1423 	struct amdgpu_device *adev = ip_block->adev;
1424 	u32 i;
1425 
1426 	for (i = 0; i < adev->sdma.num_instances; i++) {
1427 		u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
1428 
1429 		if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1430 			return false;
1431 	}
1432 
1433 	return true;
1434 }
1435 
1436 static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
1437 {
1438 	unsigned i, j;
1439 	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1440 	struct amdgpu_device *adev = ip_block->adev;
1441 
1442 	for (i = 0; i < adev->usec_timeout; i++) {
1443 		for (j = 0; j < adev->sdma.num_instances; j++) {
1444 			sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
1445 						j, regSDMA0_SDMA_STATUS_REG));
1446 			if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1447 				break;
1448 		}
1449 		if (j == adev->sdma.num_instances)
1450 			return 0;
1451 		udelay(1);
1452 	}
1453 	return -ETIMEDOUT;
1454 }
1455 
1456 static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
1457 {
1458 	int i, r = 0;
1459 	struct amdgpu_device *adev = ring->adev;
1460 	u32 index = 0;
1461 	u64 sdma_gfx_preempt;
1462 
1463 	amdgpu_sdma_get_index_from_ring(ring, &index);
1464 	sdma_gfx_preempt =
1465 		sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
1466 
1467 	/* assert preemption condition */
1468 	amdgpu_ring_set_preempt_cond_exec(ring, false);
1469 
1470 	/* emit the trailing fence */
1471 	ring->trail_seq += 1;
1472 	r = amdgpu_ring_alloc(ring, 10);
1473 	if (r) {
1474 		DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
1475 		return r;
1476 	}
1477 	sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1478 				  ring->trail_seq, 0);
1479 	amdgpu_ring_commit(ring);
1480 
1481 	/* assert IB preemption */
1482 	WREG32(sdma_gfx_preempt, 1);
1483 
1484 	/* poll the trailing fence */
1485 	for (i = 0; i < adev->usec_timeout; i++) {
1486 		if (ring->trail_seq ==
1487 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1488 			break;
1489 		udelay(1);
1490 	}
1491 
1492 	if (i >= adev->usec_timeout) {
1493 		r = -EINVAL;
1494 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1495 	}
1496 
1497 	/* deassert IB preemption */
1498 	WREG32(sdma_gfx_preempt, 0);
1499 
1500 	/* deassert the preemption condition */
1501 	amdgpu_ring_set_preempt_cond_exec(ring, true);
1502 	return r;
1503 }
1504 
1505 static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
1506 					struct amdgpu_irq_src *source,
1507 					unsigned type,
1508 					enum amdgpu_interrupt_state state)
1509 {
1510 	u32 sdma_cntl;
1511 
1512 	u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
1513 
1514 	sdma_cntl = RREG32(reg_offset);
1515 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
1516 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1517 	WREG32(reg_offset, sdma_cntl);
1518 
1519 	return 0;
1520 }
1521 
1522 static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
1523 				      struct amdgpu_irq_src *source,
1524 				      struct amdgpu_iv_entry *entry)
1525 {
1526 	int instances, queue, xcc_id = 0;
1527 	uint32_t mes_queue_id = entry->src_data[0];
1528 
1529 	DRM_DEBUG("IH: SDMA trap\n");
1530 
1531 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1532 		struct amdgpu_mes_queue *queue;
1533 
1534 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1535 
1536 		spin_lock(&adev->mes.queue_id_lock);
1537 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1538 		if (queue) {
1539 			DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1540 			amdgpu_fence_process(queue->ring);
1541 		}
1542 		spin_unlock(&adev->mes.queue_id_lock);
1543 		return 0;
1544 	}
1545 
1546 	queue = entry->ring_id & 0xf;
1547 	if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
1548 		xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
1549 	else
1550 		dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
1551 	instances = ((entry->ring_id & 0xf0) >> 4) +
1552 		xcc_id * adev->sdma.num_inst_per_xcc;
1553 	if (instances > adev->sdma.num_instances - 1) {
1554 		DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
1555 		return -EINVAL;
1556 	}
1557 
1558 	switch (entry->client_id) {
1559 	case SOC21_IH_CLIENTID_GFX:
1560 		switch (queue) {
1561 		case 0:
1562 			amdgpu_fence_process(&adev->sdma.instance[instances].ring);
1563 			break;
1564 		default:
1565 			break;
1566 		}
1567 		break;
1568 	}
1569 	return 0;
1570 }
1571 
1572 static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
1573 					      struct amdgpu_irq_src *source,
1574 					      struct amdgpu_iv_entry *entry)
1575 {
1576 	return 0;
1577 }
1578 
1579 static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
1580 					   enum amd_clockgating_state state)
1581 {
1582 	return 0;
1583 }
1584 
1585 static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
1586 					  enum amd_powergating_state state)
1587 {
1588 	return 0;
1589 }
1590 
1591 static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
1592 					    u64 *flags)
1593 {
1594 }
1595 
1596 static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
1597 {
1598 	struct amdgpu_device *adev = ip_block->adev;
1599 	int i, j;
1600 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1601 	uint32_t instance_offset;
1602 
1603 	if (!adev->sdma.ip_dump)
1604 		return;
1605 
1606 	drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
1607 	for (i = 0; i < adev->sdma.num_instances; i++) {
1608 		instance_offset = i * reg_count;
1609 		drm_printf(p, "\nInstance:%d\n", i);
1610 
1611 		for (j = 0; j < reg_count; j++)
1612 			drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
1613 				   adev->sdma.ip_dump[instance_offset + j]);
1614 	}
1615 }
1616 
1617 static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
1618 {
1619 	struct amdgpu_device *adev = ip_block->adev;
1620 	int i, j;
1621 	uint32_t instance_offset;
1622 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1623 
1624 	if (!adev->sdma.ip_dump)
1625 		return;
1626 
1627 	amdgpu_gfx_off_ctrl(adev, false);
1628 	for (i = 0; i < adev->sdma.num_instances; i++) {
1629 		instance_offset = i * reg_count;
1630 		for (j = 0; j < reg_count; j++)
1631 			adev->sdma.ip_dump[instance_offset + j] =
1632 				RREG32(sdma_v7_1_get_reg_offset(adev, i,
1633 				       sdma_reg_list_7_1[j].reg_offset));
1634 	}
1635 	amdgpu_gfx_off_ctrl(adev, true);
1636 }
1637 
1638 const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
1639 	.name = "sdma_v7_1",
1640 	.early_init = sdma_v7_1_early_init,
1641 	.late_init = NULL,
1642 	.sw_init = sdma_v7_1_sw_init,
1643 	.sw_fini = sdma_v7_1_sw_fini,
1644 	.hw_init = sdma_v7_1_hw_init,
1645 	.hw_fini = sdma_v7_1_hw_fini,
1646 	.suspend = sdma_v7_1_suspend,
1647 	.resume = sdma_v7_1_resume,
1648 	.is_idle = sdma_v7_1_is_idle,
1649 	.wait_for_idle = sdma_v7_1_wait_for_idle,
1650 	.soft_reset = sdma_v7_1_soft_reset,
1651 	.check_soft_reset = sdma_v7_1_check_soft_reset,
1652 	.set_clockgating_state = sdma_v7_1_set_clockgating_state,
1653 	.set_powergating_state = sdma_v7_1_set_powergating_state,
1654 	.get_clockgating_state = sdma_v7_1_get_clockgating_state,
1655 	.dump_ip_state = sdma_v7_1_dump_ip_state,
1656 	.print_ip_state = sdma_v7_1_print_ip_state,
1657 };
1658 
1659 static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
1660 	.type = AMDGPU_RING_TYPE_SDMA,
1661 	.align_mask = 0xf,
1662 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1663 	.support_64bit_ptrs = true,
1664 	.secure_submission_supported = true,
1665 	.get_rptr = sdma_v7_1_ring_get_rptr,
1666 	.get_wptr = sdma_v7_1_ring_get_wptr,
1667 	.set_wptr = sdma_v7_1_ring_set_wptr,
1668 	.emit_frame_size =
1669 		5 + /* sdma_v7_1_ring_init_cond_exec */
1670 		6 + /* sdma_v7_1_ring_emit_hdp_flush */
1671 		6 + /* sdma_v7_1_ring_emit_pipeline_sync */
1672 		/* sdma_v7_1_ring_emit_vm_flush */
1673 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1674 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1675 		10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
1676 	.emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
1677 	.emit_ib = sdma_v7_1_ring_emit_ib,
1678 	.emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
1679 	.emit_fence = sdma_v7_1_ring_emit_fence,
1680 	.emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
1681 	.emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
1682 	.emit_hdp_flush = sdma_v7_1_ring_emit_hdp_flush,
1683 	.test_ring = sdma_v7_1_ring_test_ring,
1684 	.test_ib = sdma_v7_1_ring_test_ib,
1685 	.insert_nop = sdma_v7_1_ring_insert_nop,
1686 	.pad_ib = sdma_v7_1_ring_pad_ib,
1687 	.emit_wreg = sdma_v7_1_ring_emit_wreg,
1688 	.emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
1689 	.emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
1690 	.init_cond_exec = sdma_v7_1_ring_init_cond_exec,
1691 	.preempt_ib = sdma_v7_1_ring_preempt_ib,
1692 	.reset = sdma_v7_1_reset_queue,
1693 };
1694 
1695 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
1696 {
1697 	int i, dev_inst;
1698 
1699 	for (i = 0; i < adev->sdma.num_instances; i++) {
1700 		adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
1701 		adev->sdma.instance[i].ring.me = i;
1702 
1703 		dev_inst = GET_INST(SDMA0, i);
1704 		/* XCC to which SDMA belongs depends on physical instance */
1705 		adev->sdma.instance[i].xcc_id =
1706 			dev_inst / adev->sdma.num_inst_per_xcc;
1707 	}
1708 }
1709 
1710 static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
1711 	.set = sdma_v7_1_set_trap_irq_state,
1712 	.process = sdma_v7_1_process_trap_irq,
1713 };
1714 
1715 static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
1716 	.process = sdma_v7_1_process_illegal_inst_irq,
1717 };
1718 
1719 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
1720 {
1721 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1722 					adev->sdma.num_instances;
1723 	adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
1724 	adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
1725 }
1726 
1727 /**
1728  * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
1729  *
1730  * @ib: indirect buffer to fill with commands
1731  * @src_offset: src GPU address
1732  * @dst_offset: dst GPU address
1733  * @byte_count: number of bytes to xfer
1734  * @copy_flags: copy flags for the buffers
1735  *
1736  * Copy GPU buffers using the DMA engine.
1737  * Used by the amdgpu ttm implementation to move pages if
1738  * registered as the asic copy callback.
1739  */
1740 static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
1741 				       uint64_t src_offset,
1742 				       uint64_t dst_offset,
1743 				       uint32_t byte_count,
1744 				       uint32_t copy_flags)
1745 {
1746 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1747 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1748 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
1749 
1750 	ib->ptr[ib->length_dw++] = byte_count - 1;
1751 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1752 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1753 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1754 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1755 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1756 }
1757 
1758 /**
1759  * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
1760  *
1761  * @ib: indirect buffer to fill
1762  * @src_data: value to write to buffer
1763  * @dst_offset: dst GPU address
1764  * @byte_count: number of bytes to xfer
1765  *
1766  * Fill GPU buffers using the DMA engine.
1767  */
1768 static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
1769 				       uint32_t src_data,
1770 				       uint64_t dst_offset,
1771 				       uint32_t byte_count)
1772 {
1773 	ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
1774 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1775 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1776 	ib->ptr[ib->length_dw++] = src_data;
1777 	ib->ptr[ib->length_dw++] = byte_count - 1;
1778 }
1779 
1780 static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
1781 	.copy_max_bytes = 0x400000,
1782 	.copy_num_dw = 8,
1783 	.emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
1784 	.fill_max_bytes = 0x400000,
1785 	.fill_num_dw = 5,
1786 	.emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
1787 };
1788 
1789 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
1790 {
1791 	adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
1792 	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1793 }
1794 
1795 static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
1796 	.copy_pte_num_dw = 8,
1797 	.copy_pte = sdma_v7_1_vm_copy_pte,
1798 	.write_pte = sdma_v7_1_vm_write_pte,
1799 	.set_pte_pde = sdma_v7_1_vm_set_pte_pde,
1800 };
1801 
1802 static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev)
1803 {
1804 	unsigned i;
1805 
1806 	adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs;
1807 	for (i = 0; i < adev->sdma.num_instances; i++) {
1808 		adev->vm_manager.vm_pte_scheds[i] =
1809 			&adev->sdma.instance[i].ring.sched;
1810 	}
1811 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1812 }
1813 
1814 const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
1815 	.type = AMD_IP_BLOCK_TYPE_SDMA,
1816 	.major = 7,
1817 	.minor = 1,
1818 	.rev = 0,
1819 	.funcs = &sdma_v7_1_ip_funcs,
1820 };
1821 
1822 static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
1823 {
1824 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1825 	int r;
1826 
1827 	r = sdma_v7_1_inst_start(adev, inst_mask);
1828 
1829 	return r;
1830 }
1831 
1832 static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
1833 {
1834 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1835 
1836 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1837 	sdma_v7_1_inst_enable(adev, false, inst_mask);
1838 
1839 	return 0;
1840 }
1841 
1842 struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
1843 	.suspend = &sdma_v7_1_xcp_suspend,
1844 	.resume = &sdma_v7_1_xcp_resume
1845 };
1846