xref: /linux/drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c (revision 4ed5116aacf6126ef9c7bc8cd7367ed6797e5c8f)
1 /*
2  * Copyright 2025 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 
29 #include "amdgpu.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_trace.h"
32 
33 #include "gc/gc_12_1_0_offset.h"
34 #include "gc/gc_12_1_0_sh_mask.h"
35 #include "hdp/hdp_6_0_0_offset.h"
36 #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h"
37 
38 #include "soc15_common.h"
39 #include "soc15.h"
40 #include "sdma_v7_1_0_pkt_open.h"
41 #include "nbio_v4_3.h"
42 #include "sdma_common.h"
43 #include "sdma_v7_1.h"
44 #include "v12_structs.h"
45 #include "mes_userqueue.h"
46 
47 MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
48 
49 #define SDMA1_REG_OFFSET 0x600
50 #define SDMA0_SDMA_IDX_0_END 0x450
51 #define SDMA1_HYP_DEC_REG_OFFSET 0x30
52 
53 static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
54 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
55 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
56 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
57 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
58 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
59 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
60 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
61 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
62 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
63 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
64 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
65 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
66 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
67 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
68 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
69 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
70 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
71 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
72 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
73 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
74 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
75 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
76 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
77 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
78 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
79 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
80 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
81 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
82 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
83 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
84 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
85 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
86 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
87 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
88 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
89 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
90 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
91 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
92 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
93 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
94 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
95 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
96 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
97 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
98 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
99 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
100 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
101 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
102 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
103 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
104 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
105 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
106 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
107 	SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
108 	SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
109 };
110 
111 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
112 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
113 static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev);
114 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
115 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
116 				uint32_t inst_mask);
117 
118 static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
119 {
120 	u32 base;
121 	u32 dev_inst = GET_INST(SDMA0, instance);
122 	int xcc_id = adev->sdma.instance[instance].xcc_id;
123 	int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
124 
125 	if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
126 		base = adev->reg_offset[GC_HWIP][xcc_id][1];
127 		if (xcc_inst != 0)
128 			internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
129 	} else {
130 		base = adev->reg_offset[GC_HWIP][xcc_id][0];
131 		if (xcc_inst != 0)
132 			internal_offset += SDMA1_REG_OFFSET * xcc_inst;
133 	}
134 
135 	return base + internal_offset;
136 }
137 
138 static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
139 					      uint64_t addr)
140 {
141 	unsigned ret;
142 
143 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
144 	amdgpu_ring_write(ring, lower_32_bits(addr));
145 	amdgpu_ring_write(ring, upper_32_bits(addr));
146 	amdgpu_ring_write(ring, 1);
147 	/* this is the offset we need patch later */
148 	ret = ring->wptr & ring->buf_mask;
149 	/* insert dummy here and patch it later */
150 	amdgpu_ring_write(ring, 0);
151 
152 	return ret;
153 }
154 
155 /**
156  * sdma_v7_1_ring_get_rptr - get the current read pointer
157  *
158  * @ring: amdgpu ring pointer
159  *
160  * Get the current rptr from the hardware.
161  */
162 static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
163 {
164 	u64 *rptr;
165 
166 	/* XXX check if swapping is necessary on BE */
167 	rptr = (u64 *)ring->rptr_cpu_addr;
168 
169 	DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
170 	return ((*rptr) >> 2);
171 }
172 
173 /**
174  * sdma_v7_1_ring_get_wptr - get the current write pointer
175  *
176  * @ring: amdgpu ring pointer
177  *
178  * Get the current wptr from the hardware.
179  */
180 static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
181 {
182 	u64 wptr = 0;
183 
184 	if (ring->use_doorbell) {
185 		/* XXX check if swapping is necessary on BE */
186 		wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
187 		DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
188 	}
189 
190 	return wptr >> 2;
191 }
192 
193 /**
194  * sdma_v7_1_ring_set_wptr - commit the write pointer
195  *
196  * @ring: amdgpu ring pointer
197  *
198  * Write the wptr back to the hardware.
199  */
200 static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
201 {
202 	struct amdgpu_device *adev = ring->adev;
203 
204 	DRM_DEBUG("Setting write pointer\n");
205 
206 	if (ring->use_doorbell) {
207 		DRM_DEBUG("Using doorbell -- "
208 			  "wptr_offs == 0x%08x "
209 			  "lower_32_bits(ring->wptr) << 2 == 0x%08x "
210 			  "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
211 			  ring->wptr_offs,
212 			  lower_32_bits(ring->wptr << 2),
213 			  upper_32_bits(ring->wptr << 2));
214 		/* XXX check if swapping is necessary on BE */
215 		atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
216 			     ring->wptr << 2);
217 		DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
218 			  ring->doorbell_index, ring->wptr << 2);
219 		WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
220 	} else {
221 		DRM_DEBUG("Not using doorbell -- "
222 			  "regSDMA%i_GFX_RB_WPTR == 0x%08x "
223 			  "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
224 			  ring->me,
225 			  lower_32_bits(ring->wptr << 2),
226 			  ring->me,
227 			  upper_32_bits(ring->wptr << 2));
228 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
229 							     ring->me,
230 							     regSDMA0_SDMA_QUEUE0_RB_WPTR),
231 				lower_32_bits(ring->wptr << 2));
232 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
233 							     ring->me,
234 							     regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
235 				upper_32_bits(ring->wptr << 2));
236 	}
237 }
238 
239 static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
240 {
241 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
242 	int i;
243 
244 	for (i = 0; i < count; i++)
245 		if (sdma && sdma->burst_nop && (i == 0))
246 			amdgpu_ring_write(ring, ring->funcs->nop |
247 				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
248 		else
249 			amdgpu_ring_write(ring, ring->funcs->nop);
250 }
251 
252 /**
253  * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
254  *
255  * @ring: amdgpu ring pointer
256  * @job: job to retrieve vmid from
257  * @ib: IB object to schedule
258  * @flags: unused
259  *
260  * Schedule an IB in the DMA ring.
261  */
262 static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
263 				   struct amdgpu_job *job,
264 				   struct amdgpu_ib *ib,
265 				   uint32_t flags)
266 {
267 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
268 	uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
269 
270 	/* An IB packet must end on a 8 DW boundary--the next dword
271 	 * must be on a 8-dword boundary. Our IB packet below is 6
272 	 * dwords long, thus add x number of NOPs, such that, in
273 	 * modular arithmetic,
274 	 * wptr + 6 + x = 8k, k >= 0, which in C is,
275 	 * (wptr + 6 + x) % 8 = 0.
276 	 * The expression below, is a solution of x.
277 	 */
278 	sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
279 
280 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
281 			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
282 	/* base must be 32 byte aligned */
283 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
284 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
285 	amdgpu_ring_write(ring, ib->length_dw);
286 	amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
287 	amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
288 }
289 
290 /**
291  * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
292  *
293  * @ring: amdgpu ring pointer
294  *
295  * flush the IB by graphics cache rinse.
296  */
297 static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
298 {
299 	uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
300 		SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
301 		SDMA_GCR_GLI_INV(1);
302 
303 	/* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
304 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
305 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
306 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
307 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
308 			  SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
309 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
310 	amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
311 			  SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
312 }
313 
314 
315 /**
316  * sdma_v7_1_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
317  *
318  * @ring: amdgpu ring pointer
319  *
320  * Emit an hdp flush packet on the requested DMA ring.
321  */
322 static void sdma_v7_1_ring_emit_hdp_flush(struct amdgpu_ring *ring)
323 {
324 	struct amdgpu_device *adev = ring->adev;
325 	u32 ref_and_mask = 0;
326 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
327 
328 	ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
329 				<< (ring->me % adev->sdma.num_inst_per_xcc);
330 
331 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
332 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
333 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
334 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
335 	amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
336 	amdgpu_ring_write(ring, ref_and_mask); /* reference */
337 	amdgpu_ring_write(ring, ref_and_mask); /* mask */
338 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
339 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
340 }
341 
342 /**
343  * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
344  *
345  * @ring: amdgpu ring pointer
346  * @addr: address
347  * @seq: fence seq number
348  * @flags: fence flags
349  *
350  * Add a DMA fence packet to the ring to write
351  * the fence seq number and DMA trap packet to generate
352  * an interrupt if needed.
353  */
354 static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
355 				      unsigned flags)
356 {
357 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
358 	/* write the fence */
359 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
360 			  SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
361 	/* zero in first two bits */
362 	BUG_ON(addr & 0x3);
363 	amdgpu_ring_write(ring, lower_32_bits(addr));
364 	amdgpu_ring_write(ring, upper_32_bits(addr));
365 	amdgpu_ring_write(ring, lower_32_bits(seq));
366 
367 	/* optionally write high bits as well */
368 	if (write64bit) {
369 		addr += 4;
370 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
371 				  SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
372 		/* zero in first two bits */
373 		BUG_ON(addr & 0x3);
374 		amdgpu_ring_write(ring, lower_32_bits(addr));
375 		amdgpu_ring_write(ring, upper_32_bits(addr));
376 		amdgpu_ring_write(ring, upper_32_bits(seq));
377 	}
378 
379 	if (flags & AMDGPU_FENCE_FLAG_INT) {
380 		/* generate an interrupt */
381 		amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
382 		amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
383 	}
384 }
385 
386 /**
387  * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
388  *
389  * @adev: amdgpu_device pointer
390  * @inst_mask: mask of dma engine instances to be disabled
391  *
392  * Stop the gfx async dma ring buffers.
393  */
394 static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
395 				    uint32_t inst_mask)
396 {
397 	u32 rb_cntl, ib_cntl;
398 	int i;
399 
400 	for_each_inst(i, inst_mask) {
401 		rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
402 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
403 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
404 		ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
405 		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
406 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
407 	}
408 }
409 
410 /**
411  * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
412  *
413  * @adev: amdgpu_device pointer
414  * @inst_mask: mask of dma engine instances to be disabled
415  *
416  * Stop the compute async dma queues.
417  */
418 static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
419 				    uint32_t inst_mask)
420 {
421 	/* XXX todo */
422 }
423 
424 /**
425  * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
426  *
427  * @adev: amdgpu_device pointer
428  * @enable: enable/disable the DMA MEs context switch.
429  * @inst_mask: mask of dma engine instances to be enabled
430  *
431  * Halt or unhalt the async dma engines context switch.
432  */
433 static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
434 					     bool enable, uint32_t inst_mask)
435 {
436 	int i;
437 
438 	for_each_inst(i, inst_mask) {
439 		WREG32_SOC15_IP(GC,
440 			sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
441 	}
442 }
443 
444 /**
445  * sdma_v7_1_inst_enable - stop the async dma engines
446  *
447  * @adev: amdgpu_device pointer
448  * @enable: enable/disable the DMA MEs.
449  * @inst_mask: mask of dma engine instances to be enabled
450  *
451  * Halt or unhalt the async dma engines.
452  */
453 static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
454 				  bool enable, uint32_t inst_mask)
455 {
456 	u32 mcu_cntl;
457 	int i;
458 
459 	if (!enable) {
460 		sdma_v7_1_inst_gfx_stop(adev, inst_mask);
461 		sdma_v7_1_inst_rlc_stop(adev, inst_mask);
462 	}
463 
464 	if (amdgpu_sriov_vf(adev))
465 		return;
466 
467 	for_each_inst(i, inst_mask) {
468 		mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
469 		mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
470 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
471 	}
472 }
473 
474 /**
475  * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
476  *
477  * @adev: amdgpu_device pointer
478  * @i: instance
479  * @restore: used to restore wptr when restart
480  *
481  * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
482  * Return 0 for success.
483  */
484 static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
485 {
486 	struct amdgpu_ring *ring;
487 	u32 rb_cntl, ib_cntl;
488 	u32 rb_bufsz;
489 	u32 doorbell;
490 	u32 doorbell_offset;
491 	u32 temp;
492 	u64 wptr_gpu_addr;
493 	int r;
494 
495 	ring = &adev->sdma.instance[i].ring;
496 
497 	/* Set ring buffer size in dwords */
498 	rb_bufsz = order_base_2(ring->ring_size / 4);
499 	rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
500 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
501 #ifdef __BIG_ENDIAN
502 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
503 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
504 				RPTR_WRITEBACK_SWAP_ENABLE, 1);
505 #endif
506 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
507 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
508 
509 	/* Initialize the ring buffer's read and write pointers */
510 	if (restore) {
511 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
512 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
513 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
514 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
515 	} else {
516 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
517 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
518 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
519 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
520 	}
521 	/* setup the wptr shadow polling */
522 	wptr_gpu_addr = ring->wptr_gpu_addr;
523 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
524 	       lower_32_bits(wptr_gpu_addr));
525 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
526 	       upper_32_bits(wptr_gpu_addr));
527 
528 	/* set the wb address whether it's enabled or not */
529 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
530 	       upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
531 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
532 	       lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
533 
534 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
535 	if (amdgpu_sriov_vf(adev))
536 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
537 	else
538 		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
539 
540 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
541 
542 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
543 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
544 
545 	if (!restore)
546 		ring->wptr = 0;
547 
548 	/* before programing wptr to a less value, need set minor_ptr_update first */
549 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
550 
551 	if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
552 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
553 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
554 	}
555 
556 	doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
557 	doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
558 
559 	if (ring->use_doorbell) {
560 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
561 		doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
562 				OFFSET, ring->doorbell_index);
563 	} else {
564 		doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
565 	}
566 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
567 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
568 
569 	if (i == 0)
570 		adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
571 					      ring->doorbell_index,
572 					      adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
573 
574 	if (amdgpu_sriov_vf(adev))
575 		sdma_v7_1_ring_set_wptr(ring);
576 
577 	/* set minor_ptr_update to 0 after wptr programed */
578 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
579 
580 	/* Set up sdma hang watchdog */
581 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
582 	/* 100ms per unit */
583 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
584 			     max(adev->usec_timeout/100000, 1));
585 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
586 
587 	/* Set up RESP_MODE to non-copy addresses */
588 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
589 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
590 	temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
591 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
592 
593 	/* program default cache read and write policy */
594 	temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
595 	/* clean read policy and write policy bits */
596 	temp &= 0xFF0FFF;
597 	temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
598 		 (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
599 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
600 
601 	if (!amdgpu_sriov_vf(adev)) {
602 		/* unhalt engine */
603 		temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
604 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
605 		temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
606 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
607 	}
608 
609 	/* enable DMA RB */
610 	rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
611 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
612 
613 	ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
614 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
615 #ifdef __BIG_ENDIAN
616 	ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
617 #endif
618 	/* enable DMA IBs */
619 	WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
620 	ring->sched.ready = true;
621 
622 	if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
623 		sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
624 		sdma_v7_1_inst_enable(adev, true, i);
625 	}
626 
627 	r = amdgpu_ring_test_helper(ring);
628 	if (r)
629 		ring->sched.ready = false;
630 
631 	return r;
632 }
633 
634 /**
635  * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
636  *
637  * @adev: amdgpu_device pointer
638  * inst_mask: mask of dma engine instances to be enabled
639  *
640  * Set up the gfx DMA ring buffers and enable them.
641  * Returns 0 for success, error for failure.
642  */
643 static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
644 				     uint32_t inst_mask)
645 {
646 	int i, r;
647 
648 	for_each_inst(i, inst_mask) {
649 		r = sdma_v7_1_gfx_resume_instance(adev, i, false);
650 		if (r)
651 			return r;
652 	}
653 
654 	return 0;
655 
656 }
657 
658 /**
659  * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
660  *
661  * @adev: amdgpu_device pointer
662  * @inst_mask: mask of dma engine instances to be enabled
663  *
664  * Set up the compute DMA queues and enable them.
665  * Returns 0 for success, error for failure.
666  */
667 static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
668 				     uint32_t inst_mask)
669 {
670 	return 0;
671 }
672 
673 static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
674 					     uint32_t inst_mask)
675 {
676 	int i;
677 
678 	for_each_inst(i, inst_mask) {
679 		amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
680 				      &adev->sdma.instance[i].sdma_fw_gpu_addr,
681 				      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
682 	}
683 }
684 
685 /**
686  * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
687  *
688  * @adev: amdgpu_device pointer
689  * @inst_mask: mask of dma engine instances to be enabled
690  *
691  * Loads the sDMA0/1 ucode.
692  * Returns 0 for success, -EINVAL if the ucode is not available.
693  */
694 static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
695 					 uint32_t inst_mask)
696 {
697 	const struct sdma_firmware_header_v3_0 *hdr;
698 	const __le32 *fw_data;
699 	u32 fw_size;
700 	uint32_t tmp, sdma_status, ic_op_cntl;
701 	int i, r, j;
702 
703 	/* halt the MEs */
704 	sdma_v7_1_inst_enable(adev, false, inst_mask);
705 
706 	if (!adev->sdma.instance[0].fw)
707 		return -EINVAL;
708 
709 	hdr = (const struct sdma_firmware_header_v3_0 *)
710 		adev->sdma.instance[0].fw->data;
711 	amdgpu_ucode_print_sdma_hdr(&hdr->header);
712 
713 	fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
714 			le32_to_cpu(hdr->ucode_offset_bytes));
715 	fw_size = le32_to_cpu(hdr->ucode_size_bytes);
716 
717 	for_each_inst(i, inst_mask) {
718 		r = amdgpu_bo_create_reserved(adev, fw_size,
719 					      PAGE_SIZE,
720 					      AMDGPU_GEM_DOMAIN_VRAM,
721 					      &adev->sdma.instance[i].sdma_fw_obj,
722 					      &adev->sdma.instance[i].sdma_fw_gpu_addr,
723 					      (void **)&adev->sdma.instance[i].sdma_fw_ptr);
724 		if (r) {
725 			dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
726 			return r;
727 		}
728 
729 		memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
730 
731 		amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
732 		amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
733 
734 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
735 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
736 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
737 
738 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
739 			lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
740 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
741 			upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
742 
743 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
744 		tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
745 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
746 
747 		/* Wait for sdma ucode init complete */
748 		for (j = 0; j < adev->usec_timeout; j++) {
749 			ic_op_cntl = RREG32_SOC15_IP(GC,
750 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
751 			sdma_status = RREG32_SOC15_IP(GC,
752 					sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
753 			if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
754 			    (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
755 				break;
756 			udelay(1);
757 		}
758 
759 		if (j >= adev->usec_timeout) {
760 			dev_err(adev->dev, "failed to init sdma ucode\n");
761 			return -EINVAL;
762 		}
763 	}
764 
765 	return 0;
766 }
767 
768 static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
769 {
770 	struct amdgpu_device *adev = ip_block->adev;
771 	uint32_t inst_mask;
772 	u32 tmp;
773 	int i;
774 
775 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
776 	sdma_v7_1_inst_gfx_stop(adev, inst_mask);
777 
778 	for_each_inst(i, inst_mask) {
779 		//tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
780 		//tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
781 		//WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
782 		tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
783 		tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
784 		tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
785 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
786 
787 		WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
788 
789 		udelay(100);
790 
791 		tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
792 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
793 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
794 
795 		udelay(100);
796 
797 		WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
798 		tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
799 
800 		udelay(100);
801 	}
802 
803 	return sdma_v7_1_inst_start(adev, inst_mask);
804 }
805 
806 static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
807 {
808 	struct amdgpu_device *adev = ip_block->adev;
809 	struct amdgpu_ring *ring;
810 	int i, r;
811 	long tmo = msecs_to_jiffies(1000);
812 
813 	for (i = 0; i < adev->sdma.num_instances; i++) {
814 		ring = &adev->sdma.instance[i].ring;
815 		r = amdgpu_ring_test_ib(ring, tmo);
816 		if (r)
817 			return true;
818 	}
819 
820 	return false;
821 }
822 
823 static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
824 				 unsigned int vmid,
825 				 struct amdgpu_fence *timedout_fence)
826 {
827 	struct amdgpu_device *adev = ring->adev;
828 	int r;
829 
830 	if (ring->me >= adev->sdma.num_instances) {
831 		dev_err(adev->dev, "sdma instance not found\n");
832 		return -EINVAL;
833 	}
834 
835 	amdgpu_ring_reset_helper_begin(ring, timedout_fence);
836 
837 	r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
838 	if (r)
839 		return r;
840 
841 	r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
842 	if (r)
843 		return r;
844 
845 	return amdgpu_ring_reset_helper_end(ring, timedout_fence);
846 }
847 
848 /**
849  * sdma_v7_1_inst_start - setup and start the async dma engines
850  *
851  * @adev: amdgpu_device pointer
852  * @inst_mask: mask of dma engine instances to be enabled
853  *
854  * Set up the DMA engines and enable them.
855  * Returns 0 for success, error for failure.
856  */
857 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
858 				uint32_t inst_mask)
859 {
860 	int r = 0;
861 
862 	if (amdgpu_sriov_vf(adev)) {
863 		sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
864 		sdma_v7_1_inst_enable(adev, false, inst_mask);
865 
866 		/* set RB registers */
867 		r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
868 		return r;
869 	}
870 
871 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
872 		r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
873 		if (r) {
874 			sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
875 			return r;
876 		}
877 
878 		if (amdgpu_emu_mode == 1)
879 			msleep(1000);
880 	}
881 
882 	/* unhalt the MEs */
883 	sdma_v7_1_inst_enable(adev, true, inst_mask);
884 	/* enable sdma ring preemption */
885 	sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
886 
887 	/* start the gfx rings and rlc compute queues */
888 	r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
889 	if (r)
890 		return r;
891 	r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
892 
893 	return r;
894 }
895 
896 static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
897 			      struct amdgpu_mqd_prop *prop)
898 {
899 	struct v12_sdma_mqd *m = mqd;
900 	uint64_t wb_gpu_addr;
901 
902 	m->sdmax_rlcx_rb_cntl =
903 		order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
904 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
905 		4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
906 		1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
907 
908 	m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
909 	m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
910 
911 	wb_gpu_addr = prop->wptr_gpu_addr;
912 	m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
913 	m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
914 
915 	wb_gpu_addr = prop->rptr_gpu_addr;
916 	m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
917 	m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
918 
919 	m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
920 							regSDMA0_SDMA_QUEUE0_IB_CNTL));
921 
922 	m->sdmax_rlcx_doorbell_offset =
923 		prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
924 
925 	m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
926 
927 	m->sdmax_rlcx_doorbell_log = 0;
928 	m->sdmax_rlcx_rb_aql_cntl = 0x4000;	//regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
929 	m->sdmax_rlcx_dummy_reg = 0xf;	//regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
930 
931 	m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
932 	m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
933 
934 	return 0;
935 }
936 
937 static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
938 {
939 	adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
940 	adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
941 }
942 
943 /**
944  * sdma_v7_1_ring_test_ring - simple async dma engine test
945  *
946  * @ring: amdgpu_ring structure holding ring information
947  *
948  * Test the DMA engine by writing using it to write an
949  * value to memory.
950  * Returns 0 for success, error for failure.
951  */
952 static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
953 {
954 	struct amdgpu_device *adev = ring->adev;
955 	unsigned i;
956 	unsigned index;
957 	int r;
958 	u32 tmp;
959 	u64 gpu_addr;
960 
961 	tmp = 0xCAFEDEAD;
962 
963 	r = amdgpu_device_wb_get(adev, &index);
964 	if (r) {
965 		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
966 		return r;
967 	}
968 
969 	gpu_addr = adev->wb.gpu_addr + (index * 4);
970 	adev->wb.wb[index] = cpu_to_le32(tmp);
971 
972 	r = amdgpu_ring_alloc(ring, 5);
973 	if (r) {
974 		DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
975 		amdgpu_device_wb_free(adev, index);
976 		return r;
977 	}
978 
979 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
980 			  SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
981 	amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
982 	amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
983 	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
984 	amdgpu_ring_write(ring, 0xDEADBEEF);
985 	amdgpu_ring_commit(ring);
986 
987 	for (i = 0; i < adev->usec_timeout; i++) {
988 		tmp = le32_to_cpu(adev->wb.wb[index]);
989 		if (tmp == 0xDEADBEEF)
990 			break;
991 		if (amdgpu_emu_mode == 1)
992 			msleep(1);
993 		else
994 			udelay(1);
995 	}
996 
997 	if (i >= adev->usec_timeout)
998 		r = -ETIMEDOUT;
999 
1000 	amdgpu_device_wb_free(adev, index);
1001 
1002 	return r;
1003 }
1004 
1005 /**
1006  * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
1007  *
1008  * @ring: amdgpu_ring structure holding ring information
1009  * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
1010  *
1011  * Test a simple IB in the DMA ring.
1012  * Returns 0 on success, error on failure.
1013  */
1014 static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1015 {
1016 	struct amdgpu_device *adev = ring->adev;
1017 	struct amdgpu_ib ib;
1018 	struct dma_fence *f = NULL;
1019 	unsigned index;
1020 	long r;
1021 	u32 tmp = 0;
1022 	u64 gpu_addr;
1023 
1024 	tmp = 0xCAFEDEAD;
1025 	memset(&ib, 0, sizeof(ib));
1026 
1027 	r = amdgpu_device_wb_get(adev, &index);
1028 	if (r) {
1029 		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1030 		return r;
1031 	}
1032 
1033 	gpu_addr = adev->wb.gpu_addr + (index * 4);
1034 	adev->wb.wb[index] = cpu_to_le32(tmp);
1035 
1036 	r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
1037 	if (r) {
1038 		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1039 		goto err0;
1040 	}
1041 
1042 	ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1043 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1044 	ib.ptr[1] = lower_32_bits(gpu_addr);
1045 	ib.ptr[2] = upper_32_bits(gpu_addr);
1046 	ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1047 	ib.ptr[4] = 0xDEADBEEF;
1048 	ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1049 	ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1050 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1051 	ib.length_dw = 8;
1052 
1053 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1054 	if (r)
1055 		goto err1;
1056 
1057 	r = dma_fence_wait_timeout(f, false, timeout);
1058 	if (r == 0) {
1059 		DRM_ERROR("amdgpu: IB test timed out\n");
1060 		r = -ETIMEDOUT;
1061 		goto err1;
1062 	} else if (r < 0) {
1063 		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1064 		goto err1;
1065 	}
1066 
1067 	tmp = le32_to_cpu(adev->wb.wb[index]);
1068 
1069 	if (tmp == 0xDEADBEEF)
1070 		r = 0;
1071 	else
1072 		r = -EINVAL;
1073 
1074 err1:
1075 	amdgpu_ib_free(&ib, NULL);
1076 	dma_fence_put(f);
1077 err0:
1078 	amdgpu_device_wb_free(adev, index);
1079 	return r;
1080 }
1081 
1082 
1083 /**
1084  * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
1085  *
1086  * @ib: indirect buffer to fill with commands
1087  * @pe: addr of the page entry
1088  * @src: src addr to copy from
1089  * @count: number of page entries to update
1090  *
1091  * Update PTEs by copying them from the GART using sDMA.
1092  */
1093 static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
1094 				  uint64_t pe, uint64_t src,
1095 				  unsigned count)
1096 {
1097 	unsigned bytes = count * 8;
1098 
1099 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1100 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1101 
1102 	ib->ptr[ib->length_dw++] = bytes - 1;
1103 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1104 	ib->ptr[ib->length_dw++] = lower_32_bits(src);
1105 	ib->ptr[ib->length_dw++] = upper_32_bits(src);
1106 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1107 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1108 
1109 }
1110 
1111 /**
1112  * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
1113  *
1114  * @ib: indirect buffer to fill with commands
1115  * @pe: addr of the page entry
1116  * @value: dst addr to write into pe
1117  * @count: number of page entries to update
1118  * @incr: increase next addr by incr bytes
1119  *
1120  * Update PTEs by writing them manually using sDMA.
1121  */
1122 static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1123 				   uint64_t value, unsigned count,
1124 				   uint32_t incr)
1125 {
1126 	unsigned ndw = count * 2;
1127 
1128 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1129 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1130 	ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1131 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1132 	ib->ptr[ib->length_dw++] = ndw - 1;
1133 	for (; ndw > 0; ndw -= 2) {
1134 		ib->ptr[ib->length_dw++] = lower_32_bits(value);
1135 		ib->ptr[ib->length_dw++] = upper_32_bits(value);
1136 		value += incr;
1137 	}
1138 }
1139 
1140 /**
1141  * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
1142  *
1143  * @ib: indirect buffer to fill with commands
1144  * @pe: addr of the page entry
1145  * @addr: dst addr to write into pe
1146  * @count: number of page entries to update
1147  * @incr: increase next addr by incr bytes
1148  * @flags: access flags
1149  *
1150  * Update the page tables using sDMA.
1151  */
1152 static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
1153 				     uint64_t pe,
1154 				     uint64_t addr, unsigned count,
1155 				     uint32_t incr, uint64_t flags)
1156 {
1157 	/* for physically contiguous pages (vram) */
1158 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
1159 	ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1160 	ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1161 	ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1162 	ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1163 	ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1164 	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1165 	ib->ptr[ib->length_dw++] = incr; /* increment size */
1166 	ib->ptr[ib->length_dw++] = 0;
1167 	ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1168 }
1169 
1170 /**
1171  * sdma_v7_1_ring_pad_ib - pad the IB
1172  *
1173  * @ring: amdgpu ring pointer
1174  * @ib: indirect buffer to fill with padding
1175  *
1176  * Pad the IB with NOPs to a boundary multiple of 8.
1177  */
1178 static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1179 {
1180 	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1181 	u32 pad_count;
1182 	int i;
1183 
1184 	pad_count = (-ib->length_dw) & 0x7;
1185 	for (i = 0; i < pad_count; i++)
1186 		if (sdma && sdma->burst_nop && (i == 0))
1187 			ib->ptr[ib->length_dw++] =
1188 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
1189 				SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1190 		else
1191 			ib->ptr[ib->length_dw++] =
1192 				SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
1193 }
1194 
1195 /**
1196  * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
1197  *
1198  * @ring: amdgpu_ring pointer
1199  *
1200  * Make sure all previous operations are completed (CIK).
1201  */
1202 static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1203 {
1204 	uint32_t seq = ring->fence_drv.sync_seq;
1205 	uint64_t addr = ring->fence_drv.gpu_addr;
1206 
1207 	/* wait for idle */
1208 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1209 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1210 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1211 			  SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1212 	amdgpu_ring_write(ring, addr & 0xfffffffc);
1213 	amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1214 	amdgpu_ring_write(ring, seq); /* reference */
1215 	amdgpu_ring_write(ring, 0xffffffff); /* mask */
1216 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1217 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1218 }
1219 
1220 /**
1221  * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
1222  *
1223  * @ring: amdgpu_ring pointer
1224  * @vmid: vmid number to use
1225  * @pd_addr: address
1226  *
1227  * Update the page table base and flush the VM TLB
1228  * using sDMA.
1229  */
1230 static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
1231 					 unsigned vmid, uint64_t pd_addr)
1232 {
1233 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1234 }
1235 
1236 static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
1237 				     uint32_t reg, uint32_t val)
1238 {
1239 	/* SRBM WRITE command will not support on sdma v7.
1240 	 * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
1241 	 */
1242 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
1243 	amdgpu_ring_write(ring, reg << 2);
1244 	amdgpu_ring_write(ring, val);
1245 }
1246 
1247 static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1248 					 uint32_t val, uint32_t mask)
1249 {
1250 	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1251 			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
1252 			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1253 	amdgpu_ring_write(ring, reg << 2);
1254 	amdgpu_ring_write(ring, 0);
1255 	amdgpu_ring_write(ring, val); /* reference */
1256 	amdgpu_ring_write(ring, mask); /* mask */
1257 	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1258 			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1259 }
1260 
1261 static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1262 						   uint32_t reg0, uint32_t reg1,
1263 						   uint32_t ref, uint32_t mask)
1264 {
1265 	amdgpu_ring_emit_wreg(ring, reg0, ref);
1266 	/* wait for a cycle to reset vm_inv_eng*_ack */
1267 	amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1268 	amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1269 }
1270 
1271 static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
1272 {
1273 	struct amdgpu_device *adev = ip_block->adev;
1274 	int r;
1275 
1276 	r = amdgpu_sdma_init_microcode(adev, 0, true);
1277 	if (r) {
1278 		DRM_ERROR("Failed to init sdma firmware!\n");
1279 		return r;
1280 	}
1281 
1282 	sdma_v7_1_set_ring_funcs(adev);
1283 	sdma_v7_1_set_buffer_funcs(adev);
1284 	sdma_v7_1_set_vm_pte_funcs(adev);
1285 	sdma_v7_1_set_irq_funcs(adev);
1286 	sdma_v7_1_set_mqd_funcs(adev);
1287 
1288 	return 0;
1289 }
1290 
1291 static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
1292 {
1293 	struct amdgpu_ring *ring;
1294 	int r, i;
1295 	struct amdgpu_device *adev = ip_block->adev;
1296 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1297 	uint32_t *ptr;
1298 	u32 xcc_id;
1299 
1300 	/* SDMA trap event */
1301 	r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_GFX,
1302 			      GFX_11_0_0__SRCID__SDMA_TRAP,
1303 			      &adev->sdma.trap_irq);
1304 	if (r)
1305 		return r;
1306 
1307 	for (i = 0; i < adev->sdma.num_instances; i++) {
1308 		ring = &adev->sdma.instance[i].ring;
1309 		ring->ring_obj = NULL;
1310 		ring->use_doorbell = true;
1311 		ring->me = i;
1312 		xcc_id = adev->sdma.instance[i].xcc_id;
1313 
1314 		DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
1315 				xcc_id, i % adev->sdma.num_inst_per_xcc,
1316 				ring->use_doorbell?"true":"false");
1317 
1318 		ring->doorbell_index =
1319 			(adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
1320 
1321 		ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1322 		sprintf(ring->name, "sdma%d.%d", xcc_id,
1323 				i % adev->sdma.num_inst_per_xcc);
1324 		r = amdgpu_ring_init(adev, ring, 1024,
1325 				     &adev->sdma.trap_irq,
1326 				     AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1327 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
1328 		if (r)
1329 			return r;
1330 	}
1331 
1332 	adev->sdma.supported_reset =
1333 		amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
1334 	if (!amdgpu_sriov_vf(adev) &&
1335 	    !adev->debug_disable_gpu_ring_reset)
1336 		adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1337 
1338 	r = amdgpu_sdma_sysfs_reset_mask_init(adev);
1339 	if (r)
1340 		return r;
1341 	/* Allocate memory for SDMA IP Dump buffer */
1342 	ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
1343 	if (ptr)
1344 		adev->sdma.ip_dump = ptr;
1345 	else
1346 		DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
1347 
1348 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
1349 	adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
1350 #endif
1351 
1352 	return r;
1353 }
1354 
1355 static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
1356 {
1357 	struct amdgpu_device *adev = ip_block->adev;
1358 	uint32_t inst_mask;
1359 	int i;
1360 
1361 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1362 
1363 	for (i = 0; i < adev->sdma.num_instances; i++)
1364 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1365 
1366 	amdgpu_sdma_sysfs_reset_mask_fini(adev);
1367 	amdgpu_sdma_destroy_inst_ctx(adev, true);
1368 
1369 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
1370 		sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
1371 
1372 	kfree(adev->sdma.ip_dump);
1373 
1374 	return 0;
1375 }
1376 
1377 static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
1378 {
1379 	struct amdgpu_device *adev = ip_block->adev;
1380 	uint32_t inst_mask;
1381 
1382 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1383 
1384 	return sdma_v7_1_inst_start(adev, inst_mask);
1385 }
1386 
1387 static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
1388 {
1389 	struct amdgpu_device *adev = ip_block->adev;
1390 	uint32_t inst_mask;
1391 
1392 	if (amdgpu_sriov_vf(adev))
1393 		return 0;
1394 
1395 	inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1396 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1397 	sdma_v7_1_inst_enable(adev, false, inst_mask);
1398 
1399 	return 0;
1400 }
1401 
1402 static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
1403 {
1404 	return sdma_v7_1_hw_fini(ip_block);
1405 }
1406 
1407 static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
1408 {
1409 	return sdma_v7_1_hw_init(ip_block);
1410 }
1411 
1412 static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
1413 {
1414 	struct amdgpu_device *adev = ip_block->adev;
1415 	u32 i;
1416 
1417 	for (i = 0; i < adev->sdma.num_instances; i++) {
1418 		u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
1419 
1420 		if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1421 			return false;
1422 	}
1423 
1424 	return true;
1425 }
1426 
1427 static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
1428 {
1429 	unsigned i, j;
1430 	u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1431 	struct amdgpu_device *adev = ip_block->adev;
1432 
1433 	for (i = 0; i < adev->usec_timeout; i++) {
1434 		for (j = 0; j < adev->sdma.num_instances; j++) {
1435 			sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
1436 						j, regSDMA0_SDMA_STATUS_REG));
1437 			if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1438 				break;
1439 		}
1440 		if (j == adev->sdma.num_instances)
1441 			return 0;
1442 		udelay(1);
1443 	}
1444 	return -ETIMEDOUT;
1445 }
1446 
1447 static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
1448 {
1449 	int i, r = 0;
1450 	struct amdgpu_device *adev = ring->adev;
1451 	u32 index = 0;
1452 	u64 sdma_gfx_preempt;
1453 
1454 	amdgpu_sdma_get_index_from_ring(ring, &index);
1455 	sdma_gfx_preempt =
1456 		sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
1457 
1458 	/* assert preemption condition */
1459 	amdgpu_ring_set_preempt_cond_exec(ring, false);
1460 
1461 	/* emit the trailing fence */
1462 	ring->trail_seq += 1;
1463 	r = amdgpu_ring_alloc(ring, 10);
1464 	if (r) {
1465 		DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
1466 		return r;
1467 	}
1468 	sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1469 				  ring->trail_seq, 0);
1470 	amdgpu_ring_commit(ring);
1471 
1472 	/* assert IB preemption */
1473 	WREG32(sdma_gfx_preempt, 1);
1474 
1475 	/* poll the trailing fence */
1476 	for (i = 0; i < adev->usec_timeout; i++) {
1477 		if (ring->trail_seq ==
1478 		    le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1479 			break;
1480 		udelay(1);
1481 	}
1482 
1483 	if (i >= adev->usec_timeout) {
1484 		r = -EINVAL;
1485 		DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1486 	}
1487 
1488 	/* deassert IB preemption */
1489 	WREG32(sdma_gfx_preempt, 0);
1490 
1491 	/* deassert the preemption condition */
1492 	amdgpu_ring_set_preempt_cond_exec(ring, true);
1493 	return r;
1494 }
1495 
1496 static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
1497 					struct amdgpu_irq_src *source,
1498 					unsigned type,
1499 					enum amdgpu_interrupt_state state)
1500 {
1501 	u32 sdma_cntl;
1502 
1503 	u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
1504 
1505 	sdma_cntl = RREG32(reg_offset);
1506 	sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
1507 		       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1508 	WREG32(reg_offset, sdma_cntl);
1509 
1510 	return 0;
1511 }
1512 
1513 static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
1514 				      struct amdgpu_irq_src *source,
1515 				      struct amdgpu_iv_entry *entry)
1516 {
1517 	int instances, queue, xcc_id = 0;
1518 	uint32_t mes_queue_id = entry->src_data[0];
1519 
1520 	DRM_DEBUG("IH: SDMA trap\n");
1521 
1522 	if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
1523 		struct amdgpu_mes_queue *queue;
1524 
1525 		mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
1526 
1527 		spin_lock(&adev->mes.queue_id_lock);
1528 		queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
1529 		if (queue) {
1530 			DRM_DEBUG("process smda queue id = %d\n", mes_queue_id);
1531 			amdgpu_fence_process(queue->ring);
1532 		}
1533 		spin_unlock(&adev->mes.queue_id_lock);
1534 		return 0;
1535 	}
1536 
1537 	queue = entry->ring_id & 0xf;
1538 	if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
1539 		xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
1540 	else
1541 		dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
1542 	instances = ((entry->ring_id & 0xf0) >> 4) +
1543 		xcc_id * adev->sdma.num_inst_per_xcc;
1544 	if (instances > adev->sdma.num_instances - 1) {
1545 		DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
1546 		return -EINVAL;
1547 	}
1548 
1549 	switch (entry->client_id) {
1550 	case SOC21_IH_CLIENTID_GFX:
1551 		switch (queue) {
1552 		case 0:
1553 			amdgpu_fence_process(&adev->sdma.instance[instances].ring);
1554 			break;
1555 		default:
1556 			break;
1557 		}
1558 		break;
1559 	}
1560 	return 0;
1561 }
1562 
1563 static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
1564 					      struct amdgpu_irq_src *source,
1565 					      struct amdgpu_iv_entry *entry)
1566 {
1567 	return 0;
1568 }
1569 
1570 static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
1571 					   enum amd_clockgating_state state)
1572 {
1573 	return 0;
1574 }
1575 
1576 static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
1577 					  enum amd_powergating_state state)
1578 {
1579 	return 0;
1580 }
1581 
1582 static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
1583 					    u64 *flags)
1584 {
1585 }
1586 
1587 static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
1588 {
1589 	struct amdgpu_device *adev = ip_block->adev;
1590 	int i, j;
1591 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1592 	uint32_t instance_offset;
1593 
1594 	if (!adev->sdma.ip_dump)
1595 		return;
1596 
1597 	drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
1598 	for (i = 0; i < adev->sdma.num_instances; i++) {
1599 		instance_offset = i * reg_count;
1600 		drm_printf(p, "\nInstance:%d\n", i);
1601 
1602 		for (j = 0; j < reg_count; j++)
1603 			drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
1604 				   adev->sdma.ip_dump[instance_offset + j]);
1605 	}
1606 }
1607 
1608 static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
1609 {
1610 	struct amdgpu_device *adev = ip_block->adev;
1611 	int i, j;
1612 	uint32_t instance_offset;
1613 	uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1614 
1615 	if (!adev->sdma.ip_dump)
1616 		return;
1617 
1618 	amdgpu_gfx_off_ctrl(adev, false);
1619 	for (i = 0; i < adev->sdma.num_instances; i++) {
1620 		instance_offset = i * reg_count;
1621 		for (j = 0; j < reg_count; j++)
1622 			adev->sdma.ip_dump[instance_offset + j] =
1623 				RREG32(sdma_v7_1_get_reg_offset(adev, i,
1624 				       sdma_reg_list_7_1[j].reg_offset));
1625 	}
1626 	amdgpu_gfx_off_ctrl(adev, true);
1627 }
1628 
1629 const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
1630 	.name = "sdma_v7_1",
1631 	.early_init = sdma_v7_1_early_init,
1632 	.late_init = NULL,
1633 	.sw_init = sdma_v7_1_sw_init,
1634 	.sw_fini = sdma_v7_1_sw_fini,
1635 	.hw_init = sdma_v7_1_hw_init,
1636 	.hw_fini = sdma_v7_1_hw_fini,
1637 	.suspend = sdma_v7_1_suspend,
1638 	.resume = sdma_v7_1_resume,
1639 	.is_idle = sdma_v7_1_is_idle,
1640 	.wait_for_idle = sdma_v7_1_wait_for_idle,
1641 	.soft_reset = sdma_v7_1_soft_reset,
1642 	.check_soft_reset = sdma_v7_1_check_soft_reset,
1643 	.set_clockgating_state = sdma_v7_1_set_clockgating_state,
1644 	.set_powergating_state = sdma_v7_1_set_powergating_state,
1645 	.get_clockgating_state = sdma_v7_1_get_clockgating_state,
1646 	.dump_ip_state = sdma_v7_1_dump_ip_state,
1647 	.print_ip_state = sdma_v7_1_print_ip_state,
1648 };
1649 
1650 static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
1651 	.type = AMDGPU_RING_TYPE_SDMA,
1652 	.align_mask = 0xf,
1653 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1654 	.support_64bit_ptrs = true,
1655 	.secure_submission_supported = true,
1656 	.get_rptr = sdma_v7_1_ring_get_rptr,
1657 	.get_wptr = sdma_v7_1_ring_get_wptr,
1658 	.set_wptr = sdma_v7_1_ring_set_wptr,
1659 	.emit_frame_size =
1660 		5 + /* sdma_v7_1_ring_init_cond_exec */
1661 		6 + /* sdma_v7_1_ring_emit_hdp_flush */
1662 		6 + /* sdma_v7_1_ring_emit_pipeline_sync */
1663 		/* sdma_v7_1_ring_emit_vm_flush */
1664 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1665 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1666 		10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
1667 	.emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
1668 	.emit_ib = sdma_v7_1_ring_emit_ib,
1669 	.emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
1670 	.emit_fence = sdma_v7_1_ring_emit_fence,
1671 	.emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
1672 	.emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
1673 	.emit_hdp_flush = sdma_v7_1_ring_emit_hdp_flush,
1674 	.test_ring = sdma_v7_1_ring_test_ring,
1675 	.test_ib = sdma_v7_1_ring_test_ib,
1676 	.insert_nop = sdma_v7_1_ring_insert_nop,
1677 	.pad_ib = sdma_v7_1_ring_pad_ib,
1678 	.emit_wreg = sdma_v7_1_ring_emit_wreg,
1679 	.emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
1680 	.emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
1681 	.init_cond_exec = sdma_v7_1_ring_init_cond_exec,
1682 	.preempt_ib = sdma_v7_1_ring_preempt_ib,
1683 	.reset = sdma_v7_1_reset_queue,
1684 };
1685 
1686 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
1687 {
1688 	int i, dev_inst;
1689 
1690 	for (i = 0; i < adev->sdma.num_instances; i++) {
1691 		adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
1692 		adev->sdma.instance[i].ring.me = i;
1693 
1694 		dev_inst = GET_INST(SDMA0, i);
1695 		/* XCC to which SDMA belongs depends on physical instance */
1696 		adev->sdma.instance[i].xcc_id =
1697 			dev_inst / adev->sdma.num_inst_per_xcc;
1698 	}
1699 }
1700 
1701 static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
1702 	.set = sdma_v7_1_set_trap_irq_state,
1703 	.process = sdma_v7_1_process_trap_irq,
1704 };
1705 
1706 static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
1707 	.process = sdma_v7_1_process_illegal_inst_irq,
1708 };
1709 
1710 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
1711 {
1712 	adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1713 					adev->sdma.num_instances;
1714 	adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
1715 	adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
1716 }
1717 
1718 /**
1719  * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
1720  *
1721  * @ib: indirect buffer to fill with commands
1722  * @src_offset: src GPU address
1723  * @dst_offset: dst GPU address
1724  * @byte_count: number of bytes to xfer
1725  * @copy_flags: copy flags for the buffers
1726  *
1727  * Copy GPU buffers using the DMA engine.
1728  * Used by the amdgpu ttm implementation to move pages if
1729  * registered as the asic copy callback.
1730  */
1731 static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
1732 				       uint64_t src_offset,
1733 				       uint64_t dst_offset,
1734 				       uint32_t byte_count,
1735 				       uint32_t copy_flags)
1736 {
1737 	ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1738 		SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1739 		SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
1740 
1741 	ib->ptr[ib->length_dw++] = byte_count - 1;
1742 	ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1743 	ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1744 	ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1745 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1746 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1747 }
1748 
1749 /**
1750  * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
1751  *
1752  * @ib: indirect buffer to fill
1753  * @src_data: value to write to buffer
1754  * @dst_offset: dst GPU address
1755  * @byte_count: number of bytes to xfer
1756  *
1757  * Fill GPU buffers using the DMA engine.
1758  */
1759 static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
1760 				       uint32_t src_data,
1761 				       uint64_t dst_offset,
1762 				       uint32_t byte_count)
1763 {
1764 	ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
1765 	ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1766 	ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1767 	ib->ptr[ib->length_dw++] = src_data;
1768 	ib->ptr[ib->length_dw++] = byte_count - 1;
1769 }
1770 
1771 static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
1772 	.copy_max_bytes = 0x400000,
1773 	.copy_num_dw = 8,
1774 	.emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
1775 	.fill_max_bytes = 0x400000,
1776 	.fill_num_dw = 5,
1777 	.emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
1778 };
1779 
1780 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
1781 {
1782 	adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
1783 	adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1784 }
1785 
1786 static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
1787 	.copy_pte_num_dw = 8,
1788 	.copy_pte = sdma_v7_1_vm_copy_pte,
1789 	.write_pte = sdma_v7_1_vm_write_pte,
1790 	.set_pte_pde = sdma_v7_1_vm_set_pte_pde,
1791 };
1792 
1793 static void sdma_v7_1_set_vm_pte_funcs(struct amdgpu_device *adev)
1794 {
1795 	unsigned i;
1796 
1797 	adev->vm_manager.vm_pte_funcs = &sdma_v7_1_vm_pte_funcs;
1798 	for (i = 0; i < adev->sdma.num_instances; i++) {
1799 		adev->vm_manager.vm_pte_scheds[i] =
1800 			&adev->sdma.instance[i].ring.sched;
1801 	}
1802 	adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
1803 }
1804 
1805 const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
1806 	.type = AMD_IP_BLOCK_TYPE_SDMA,
1807 	.major = 7,
1808 	.minor = 1,
1809 	.rev = 0,
1810 	.funcs = &sdma_v7_1_ip_funcs,
1811 };
1812 
1813 static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
1814 {
1815 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1816 	int r;
1817 
1818 	r = sdma_v7_1_inst_start(adev, inst_mask);
1819 
1820 	return r;
1821 }
1822 
1823 static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
1824 {
1825 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1826 
1827 	sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1828 	sdma_v7_1_inst_enable(adev, false, inst_mask);
1829 
1830 	return 0;
1831 }
1832 
1833 struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
1834 	.suspend = &sdma_v7_1_xcp_suspend,
1835 	.resume = &sdma_v7_1_xcp_resume
1836 };
1837