1 /*
2 * Copyright 2025 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28
29 #include "amdgpu.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_trace.h"
32
33 #include "gc/gc_12_1_0_offset.h"
34 #include "gc/gc_12_1_0_sh_mask.h"
35 #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
36
37 #include "soc15_common.h"
38 #include "soc15.h"
39 #include "sdma_v7_1_0_pkt_open.h"
40 #include "nbio_v4_3.h"
41 #include "sdma_common.h"
42 #include "sdma_v7_1.h"
43 #include "v12_structs.h"
44 #include "mes_userqueue.h"
45 #include "soc_v1_0.h"
46
47 MODULE_FIRMWARE("amdgpu/sdma_7_1_0.bin");
48
49 #define SDMA1_REG_OFFSET 0x600
50 #define SDMA0_SDMA_IDX_0_END 0x450
51 #define SDMA1_HYP_DEC_REG_OFFSET 0x30
52
53 static const struct amdgpu_hwip_reg_entry sdma_reg_list_7_1[] = {
54 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS_REG),
55 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS1_REG),
56 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS2_REG),
57 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS3_REG),
58 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS4_REG),
59 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS5_REG),
60 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_STATUS6_REG),
61 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UCODE_REV),
62 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH_HI),
63 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_RB_RPTR_FETCH),
64 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_STATUS),
65 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_STATUS),
66 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK0),
67 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_RD_XNACK1),
68 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK0),
69 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_UTCL1_WR_XNACK1),
70 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_CNTL),
71 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR),
72 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI),
73 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR),
74 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
75 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_OFFSET),
76 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_LO),
77 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_BASE_HI),
78 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_CNTL),
79 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_RPTR),
80 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_IB_SUB_REMAIN),
81 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE0_DUMMY_REG),
82 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE_STATUS0),
83 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_CNTL),
84 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR),
85 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_RPTR_HI),
86 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR),
87 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_RB_WPTR_HI),
88 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_OFFSET),
89 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_LO),
90 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_BASE_HI),
91 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_RPTR),
92 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_IB_SUB_REMAIN),
93 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE1_DUMMY_REG),
94 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_CNTL),
95 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR),
96 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_RPTR_HI),
97 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR),
98 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_RB_WPTR_HI),
99 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_OFFSET),
100 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_LO),
101 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_BASE_HI),
102 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_RPTR),
103 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_IB_SUB_REMAIN),
104 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_QUEUE2_DUMMY_REG),
105 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_INT_STATUS),
106 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_VM_CNTL),
107 SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
108 SOC15_REG_ENTRY_STR(GC, 0, regSDMA0_SDMA_CHICKEN_BITS),
109 };
110
111 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev);
112 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev);
113 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev);
114 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
115 uint32_t inst_mask);
116
sdma_v7_1_get_reg_offset(struct amdgpu_device * adev,u32 instance,u32 internal_offset)117 static u32 sdma_v7_1_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
118 {
119 u32 base;
120 u32 dev_inst = GET_INST(SDMA0, instance);
121 int xcc_id = adev->sdma.instance[instance].xcc_id;
122 int xcc_inst = dev_inst % adev->sdma.num_inst_per_xcc;
123
124 if (internal_offset >= SDMA0_SDMA_IDX_0_END) {
125 base = adev->reg_offset[GC_HWIP][xcc_id][1];
126 if (xcc_inst != 0)
127 internal_offset += SDMA1_HYP_DEC_REG_OFFSET * xcc_inst;
128 } else {
129 base = adev->reg_offset[GC_HWIP][xcc_id][0];
130 if (xcc_inst != 0)
131 internal_offset += SDMA1_REG_OFFSET * xcc_inst;
132 }
133
134 return base + internal_offset;
135 }
136
sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring * ring,uint64_t addr)137 static unsigned sdma_v7_1_ring_init_cond_exec(struct amdgpu_ring *ring,
138 uint64_t addr)
139 {
140 unsigned ret;
141
142 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
143 amdgpu_ring_write(ring, lower_32_bits(addr));
144 amdgpu_ring_write(ring, upper_32_bits(addr));
145 amdgpu_ring_write(ring, 1);
146 /* this is the offset we need patch later */
147 ret = ring->wptr & ring->buf_mask;
148 /* insert dummy here and patch it later */
149 amdgpu_ring_write(ring, 0);
150
151 return ret;
152 }
153
154 /**
155 * sdma_v7_1_ring_get_rptr - get the current read pointer
156 *
157 * @ring: amdgpu ring pointer
158 *
159 * Get the current rptr from the hardware.
160 */
sdma_v7_1_ring_get_rptr(struct amdgpu_ring * ring)161 static uint64_t sdma_v7_1_ring_get_rptr(struct amdgpu_ring *ring)
162 {
163 u64 *rptr;
164
165 /* XXX check if swapping is necessary on BE */
166 rptr = (u64 *)ring->rptr_cpu_addr;
167
168 DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
169 return ((*rptr) >> 2);
170 }
171
172 /**
173 * sdma_v7_1_ring_get_wptr - get the current write pointer
174 *
175 * @ring: amdgpu ring pointer
176 *
177 * Get the current wptr from the hardware.
178 */
sdma_v7_1_ring_get_wptr(struct amdgpu_ring * ring)179 static uint64_t sdma_v7_1_ring_get_wptr(struct amdgpu_ring *ring)
180 {
181 u64 wptr = 0;
182
183 if (ring->use_doorbell) {
184 /* XXX check if swapping is necessary on BE */
185 wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr));
186 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
187 }
188
189 return wptr >> 2;
190 }
191
192 /**
193 * sdma_v7_1_ring_set_wptr - commit the write pointer
194 *
195 * @ring: amdgpu ring pointer
196 *
197 * Write the wptr back to the hardware.
198 */
sdma_v7_1_ring_set_wptr(struct amdgpu_ring * ring)199 static void sdma_v7_1_ring_set_wptr(struct amdgpu_ring *ring)
200 {
201 struct amdgpu_device *adev = ring->adev;
202
203 DRM_DEBUG("Setting write pointer\n");
204
205 if (ring->use_doorbell) {
206 DRM_DEBUG("Using doorbell -- "
207 "wptr_offs == 0x%08x "
208 "lower_32_bits(ring->wptr) << 2 == 0x%08x "
209 "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
210 ring->wptr_offs,
211 lower_32_bits(ring->wptr << 2),
212 upper_32_bits(ring->wptr << 2));
213 /* XXX check if swapping is necessary on BE */
214 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
215 ring->wptr << 2);
216 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
217 ring->doorbell_index, ring->wptr << 2);
218 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
219 } else {
220 DRM_DEBUG("Not using doorbell -- "
221 "regSDMA%i_GFX_RB_WPTR == 0x%08x "
222 "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
223 ring->me,
224 lower_32_bits(ring->wptr << 2),
225 ring->me,
226 upper_32_bits(ring->wptr << 2));
227 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
228 ring->me,
229 regSDMA0_SDMA_QUEUE0_RB_WPTR),
230 lower_32_bits(ring->wptr << 2));
231 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev,
232 ring->me,
233 regSDMA0_SDMA_QUEUE0_RB_WPTR_HI),
234 upper_32_bits(ring->wptr << 2));
235 }
236 }
237
sdma_v7_1_ring_insert_nop(struct amdgpu_ring * ring,uint32_t count)238 static void sdma_v7_1_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
239 {
240 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
241 int i;
242
243 for (i = 0; i < count; i++)
244 if (sdma && sdma->burst_nop && (i == 0))
245 amdgpu_ring_write(ring, ring->funcs->nop |
246 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
247 else
248 amdgpu_ring_write(ring, ring->funcs->nop);
249 }
250
251 /**
252 * sdma_v7_1_ring_emit_ib - Schedule an IB on the DMA engine
253 *
254 * @ring: amdgpu ring pointer
255 * @job: job to retrieve vmid from
256 * @ib: IB object to schedule
257 * @flags: unused
258 *
259 * Schedule an IB in the DMA ring.
260 */
sdma_v7_1_ring_emit_ib(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)261 static void sdma_v7_1_ring_emit_ib(struct amdgpu_ring *ring,
262 struct amdgpu_job *job,
263 struct amdgpu_ib *ib,
264 uint32_t flags)
265 {
266 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
267 uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
268
269 /* An IB packet must end on a 8 DW boundary--the next dword
270 * must be on a 8-dword boundary. Our IB packet below is 6
271 * dwords long, thus add x number of NOPs, such that, in
272 * modular arithmetic,
273 * wptr + 6 + x = 8k, k >= 0, which in C is,
274 * (wptr + 6 + x) % 8 = 0.
275 * The expression below, is a solution of x.
276 */
277 sdma_v7_1_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
278
279 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
280 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
281 /* base must be 32 byte aligned */
282 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
283 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
284 amdgpu_ring_write(ring, ib->length_dw);
285 amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr));
286 amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr));
287 }
288
289 /**
290 * sdma_v7_1_ring_emit_mem_sync - flush the IB by graphics cache rinse
291 *
292 * @ring: amdgpu ring pointer
293 *
294 * flush the IB by graphics cache rinse.
295 */
sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring * ring)296 static void sdma_v7_1_ring_emit_mem_sync(struct amdgpu_ring *ring)
297 {
298 uint32_t gcr_cntl = SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
299 SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
300 SDMA_GCR_GLI_INV(1);
301
302 /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
303 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_GCR_REQ));
304 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
305 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_56_32(0));
306 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_0(gcr_cntl) |
307 SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_15_7(0));
308 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_16(0));
309 amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD5_LIMIT_VA_56_48(0) |
310 SDMA_PKT_GCR_REQ_PAYLOAD5_VMID(0));
311 }
312
313
314 /**
315 * sdma_v7_1_ring_emit_fence - emit a fence on the DMA ring
316 *
317 * @ring: amdgpu ring pointer
318 * @addr: address
319 * @seq: fence seq number
320 * @flags: fence flags
321 *
322 * Add a DMA fence packet to the ring to write
323 * the fence seq number and DMA trap packet to generate
324 * an interrupt if needed.
325 */
sdma_v7_1_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)326 static void sdma_v7_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
327 unsigned flags)
328 {
329 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
330 /* write the fence */
331 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
332 SDMA_PKT_FENCE_HEADER_MTYPE(0x3)); /* Ucached(UC) */
333 /* zero in first two bits */
334 BUG_ON(addr & 0x3);
335 amdgpu_ring_write(ring, lower_32_bits(addr));
336 amdgpu_ring_write(ring, upper_32_bits(addr));
337 amdgpu_ring_write(ring, lower_32_bits(seq));
338
339 /* optionally write high bits as well */
340 if (write64bit) {
341 addr += 4;
342 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_FENCE) |
343 SDMA_PKT_FENCE_HEADER_MTYPE(0x3));
344 /* zero in first two bits */
345 BUG_ON(addr & 0x3);
346 amdgpu_ring_write(ring, lower_32_bits(addr));
347 amdgpu_ring_write(ring, upper_32_bits(addr));
348 amdgpu_ring_write(ring, upper_32_bits(seq));
349 }
350
351 if (flags & AMDGPU_FENCE_FLAG_INT) {
352 /* generate an interrupt */
353 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_TRAP));
354 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
355 }
356 }
357
358 /**
359 * sdma_v7_1_inst_gfx_stop - stop the gfx async dma engines
360 *
361 * @adev: amdgpu_device pointer
362 * @inst_mask: mask of dma engine instances to be disabled
363 *
364 * Stop the gfx async dma ring buffers.
365 */
sdma_v7_1_inst_gfx_stop(struct amdgpu_device * adev,uint32_t inst_mask)366 static void sdma_v7_1_inst_gfx_stop(struct amdgpu_device *adev,
367 uint32_t inst_mask)
368 {
369 u32 rb_cntl, ib_cntl;
370 int i;
371
372 for_each_inst(i, inst_mask) {
373 rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
374 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 0);
375 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
376 ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
377 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 0);
378 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
379 }
380 }
381
382 /**
383 * sdma_v7_1_inst_rlc_stop - stop the compute async dma engines
384 *
385 * @adev: amdgpu_device pointer
386 * @inst_mask: mask of dma engine instances to be disabled
387 *
388 * Stop the compute async dma queues.
389 */
sdma_v7_1_inst_rlc_stop(struct amdgpu_device * adev,uint32_t inst_mask)390 static void sdma_v7_1_inst_rlc_stop(struct amdgpu_device *adev,
391 uint32_t inst_mask)
392 {
393 /* XXX todo */
394 }
395
396 /**
397 * sdma_v7_1_inst_ctx_switch_enable - stop the async dma engines context switch
398 *
399 * @adev: amdgpu_device pointer
400 * @enable: enable/disable the DMA MEs context switch.
401 * @inst_mask: mask of dma engine instances to be enabled
402 *
403 * Halt or unhalt the async dma engines context switch.
404 */
sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)405 static void sdma_v7_1_inst_ctx_switch_enable(struct amdgpu_device *adev,
406 bool enable, uint32_t inst_mask)
407 {
408 int i;
409
410 for_each_inst(i, inst_mask) {
411 WREG32_SOC15_IP(GC,
412 sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_TIMEOUT), 0x80);
413 }
414 }
415
416 /**
417 * sdma_v7_1_inst_enable - stop the async dma engines
418 *
419 * @adev: amdgpu_device pointer
420 * @enable: enable/disable the DMA MEs.
421 * @inst_mask: mask of dma engine instances to be enabled
422 *
423 * Halt or unhalt the async dma engines.
424 */
sdma_v7_1_inst_enable(struct amdgpu_device * adev,bool enable,uint32_t inst_mask)425 static void sdma_v7_1_inst_enable(struct amdgpu_device *adev,
426 bool enable, uint32_t inst_mask)
427 {
428 u32 mcu_cntl;
429 int i;
430
431 if (!enable) {
432 sdma_v7_1_inst_gfx_stop(adev, inst_mask);
433 sdma_v7_1_inst_rlc_stop(adev, inst_mask);
434 }
435
436 if (amdgpu_sriov_vf(adev))
437 return;
438
439 for_each_inst(i, inst_mask) {
440 mcu_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
441 mcu_cntl = REG_SET_FIELD(mcu_cntl, SDMA0_SDMA_MCU_CNTL, HALT, enable ? 0 : 1);
442 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), mcu_cntl);
443 }
444 }
445
446 /**
447 * sdma_v7_1_gfx_resume_instance - start/restart a certain sdma engine
448 *
449 * @adev: amdgpu_device pointer
450 * @i: instance
451 * @restore: used to restore wptr when restart
452 *
453 * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr.
454 * Return 0 for success.
455 */
sdma_v7_1_gfx_resume_instance(struct amdgpu_device * adev,int i,bool restore)456 static int sdma_v7_1_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore)
457 {
458 struct amdgpu_ring *ring;
459 u32 rb_cntl, ib_cntl;
460 u32 rb_bufsz;
461 u32 doorbell;
462 u32 doorbell_offset;
463 u32 temp;
464 u64 wptr_gpu_addr;
465 int r;
466
467 ring = &adev->sdma.instance[i].ring;
468
469 /* Set ring buffer size in dwords */
470 rb_bufsz = order_base_2(ring->ring_size / 4);
471 rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL));
472 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz);
473 #ifdef __BIG_ENDIAN
474 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1);
475 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL,
476 RPTR_WRITEBACK_SWAP_ENABLE, 1);
477 #endif
478 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_PRIV, 1);
479 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
480
481 /* Initialize the ring buffer's read and write pointers */
482 if (restore) {
483 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2));
484 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2));
485 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2));
486 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
487 } else {
488 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR), 0);
489 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_HI), 0);
490 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), 0);
491 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), 0);
492 }
493 /* setup the wptr shadow polling */
494 wptr_gpu_addr = ring->wptr_gpu_addr;
495 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_LO),
496 lower_32_bits(wptr_gpu_addr));
497 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_POLL_ADDR_HI),
498 upper_32_bits(wptr_gpu_addr));
499
500 /* set the wb address whether it's enabled or not */
501 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_HI),
502 upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF);
503 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_RPTR_ADDR_LO),
504 lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC);
505
506 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
507 if (amdgpu_sriov_vf(adev))
508 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1);
509 else
510 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0);
511
512 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1);
513
514 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE), ring->gpu_addr >> 8);
515 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40);
516
517 if (!restore)
518 ring->wptr = 0;
519
520 /* before programing wptr to a less value, need set minor_ptr_update first */
521 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 1);
522
523 if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
524 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2);
525 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
526 }
527
528 doorbell = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL));
529 doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET));
530
531 if (ring->use_doorbell) {
532 doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
533 doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET,
534 OFFSET, ring->doorbell_index);
535 } else {
536 doorbell = REG_SET_FIELD(doorbell, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 0);
537 }
538 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL), doorbell);
539 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_DOORBELL_OFFSET), doorbell_offset);
540
541 if (i == 0)
542 adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
543 ring->doorbell_index,
544 adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances);
545
546 if (amdgpu_sriov_vf(adev))
547 sdma_v7_1_ring_set_wptr(ring);
548
549 /* set minor_ptr_update to 0 after wptr programed */
550 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_MINOR_PTR_UPDATE), 0);
551
552 /* Set up sdma hang watchdog */
553 temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL));
554 /* 100ms per unit */
555 temp = REG_SET_FIELD(temp, SDMA0_SDMA_WATCHDOG_CNTL, QUEUE_HANG_COUNT,
556 max(adev->usec_timeout/100000, 1));
557 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_WATCHDOG_CNTL), temp);
558
559 /* Set up RESP_MODE to non-copy addresses */
560 temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL));
561 temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, RESP_MODE, 3);
562 temp = REG_SET_FIELD(temp, SDMA0_SDMA_UTCL1_CNTL, REDO_DELAY, 9);
563 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_CNTL), temp);
564
565 /* program default cache read and write policy */
566 temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE));
567 /* clean read policy and write policy bits */
568 temp &= 0xFF0FFF;
569 temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
570 (CACHE_WRITE_POLICY_L2__DEFAULT << 14));
571 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_UTCL1_PAGE), temp);
572
573 if (!amdgpu_sriov_vf(adev)) {
574 /* unhalt engine */
575 temp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
576 temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, HALT, 0);
577 temp = REG_SET_FIELD(temp, SDMA0_SDMA_MCU_CNTL, RESET, 0);
578 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), temp);
579 }
580
581 /* enable DMA RB */
582 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_SDMA_QUEUE0_RB_CNTL, RB_ENABLE, 1);
583 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_RB_CNTL), rb_cntl);
584
585 ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL));
586 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_ENABLE, 1);
587 #ifdef __BIG_ENDIAN
588 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_SDMA_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1);
589 #endif
590 /* enable DMA IBs */
591 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_IB_CNTL), ib_cntl);
592 ring->sched.ready = true;
593
594 if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
595 sdma_v7_1_inst_ctx_switch_enable(adev, true, i);
596 sdma_v7_1_inst_enable(adev, true, i);
597 }
598
599 r = amdgpu_ring_test_helper(ring);
600 if (r)
601 ring->sched.ready = false;
602
603 return r;
604 }
605
606 /**
607 * sdma_v7_1_inst_gfx_resume - setup and start the async dma engines
608 *
609 * @adev: amdgpu_device pointer
610 * @inst_mask: mask of dma engine instances to be enabled
611 *
612 * Set up the gfx DMA ring buffers and enable them.
613 * Returns 0 for success, error for failure.
614 */
sdma_v7_1_inst_gfx_resume(struct amdgpu_device * adev,uint32_t inst_mask)615 static int sdma_v7_1_inst_gfx_resume(struct amdgpu_device *adev,
616 uint32_t inst_mask)
617 {
618 int i, r;
619
620 for_each_inst(i, inst_mask) {
621 r = sdma_v7_1_gfx_resume_instance(adev, i, false);
622 if (r)
623 return r;
624 }
625
626 return 0;
627
628 }
629
630 /**
631 * sdma_v7_1_inst_rlc_resume - setup and start the async dma engines
632 *
633 * @adev: amdgpu_device pointer
634 * @inst_mask: mask of dma engine instances to be enabled
635 *
636 * Set up the compute DMA queues and enable them.
637 * Returns 0 for success, error for failure.
638 */
sdma_v7_1_inst_rlc_resume(struct amdgpu_device * adev,uint32_t inst_mask)639 static int sdma_v7_1_inst_rlc_resume(struct amdgpu_device *adev,
640 uint32_t inst_mask)
641 {
642 return 0;
643 }
644
sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device * adev,uint32_t inst_mask)645 static void sdma_v7_1_inst_free_ucode_buffer(struct amdgpu_device *adev,
646 uint32_t inst_mask)
647 {
648 int i;
649
650 for_each_inst(i, inst_mask) {
651 amdgpu_bo_free_kernel(&adev->sdma.instance[i].sdma_fw_obj,
652 &adev->sdma.instance[i].sdma_fw_gpu_addr,
653 (void **)&adev->sdma.instance[i].sdma_fw_ptr);
654 }
655 }
656
657 /**
658 * sdma_v7_1_inst_load_microcode - load the sDMA ME ucode
659 *
660 * @adev: amdgpu_device pointer
661 * @inst_mask: mask of dma engine instances to be enabled
662 *
663 * Loads the sDMA0/1 ucode.
664 * Returns 0 for success, -EINVAL if the ucode is not available.
665 */
sdma_v7_1_inst_load_microcode(struct amdgpu_device * adev,uint32_t inst_mask)666 static int sdma_v7_1_inst_load_microcode(struct amdgpu_device *adev,
667 uint32_t inst_mask)
668 {
669 const struct sdma_firmware_header_v3_0 *hdr;
670 const __le32 *fw_data;
671 u32 fw_size;
672 uint32_t tmp, sdma_status, ic_op_cntl;
673 int i, r, j;
674
675 /* halt the MEs */
676 sdma_v7_1_inst_enable(adev, false, inst_mask);
677
678 if (!adev->sdma.instance[0].fw)
679 return -EINVAL;
680
681 hdr = (const struct sdma_firmware_header_v3_0 *)
682 adev->sdma.instance[0].fw->data;
683 amdgpu_ucode_print_sdma_hdr(&hdr->header);
684
685 fw_data = (const __le32 *)(adev->sdma.instance[0].fw->data +
686 le32_to_cpu(hdr->ucode_offset_bytes));
687 fw_size = le32_to_cpu(hdr->ucode_size_bytes);
688
689 for_each_inst(i, inst_mask) {
690 r = amdgpu_bo_create_reserved(adev, fw_size,
691 PAGE_SIZE,
692 AMDGPU_GEM_DOMAIN_VRAM,
693 &adev->sdma.instance[i].sdma_fw_obj,
694 &adev->sdma.instance[i].sdma_fw_gpu_addr,
695 (void **)&adev->sdma.instance[i].sdma_fw_ptr);
696 if (r) {
697 dev_err(adev->dev, "(%d) failed to create sdma ucode bo\n", r);
698 return r;
699 }
700
701 memcpy(adev->sdma.instance[i].sdma_fw_ptr, fw_data, fw_size);
702
703 amdgpu_bo_kunmap(adev->sdma.instance[i].sdma_fw_obj);
704 amdgpu_bo_unreserve(adev->sdma.instance[i].sdma_fw_obj);
705
706 tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL));
707 tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_CNTL, GPA, 0);
708 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_CNTL), tmp);
709
710 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_LO),
711 lower_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
712 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_BASE_HI),
713 upper_32_bits(adev->sdma.instance[i].sdma_fw_gpu_addr));
714
715 tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
716 tmp = REG_SET_FIELD(tmp, SDMA0_SDMA_IC_OP_CNTL, PRIME_ICACHE, 1);
717 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL), tmp);
718
719 /* Wait for sdma ucode init complete */
720 for (j = 0; j < adev->usec_timeout; j++) {
721 ic_op_cntl = RREG32_SOC15_IP(GC,
722 sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_IC_OP_CNTL));
723 sdma_status = RREG32_SOC15_IP(GC,
724 sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
725 if ((REG_GET_FIELD(ic_op_cntl, SDMA0_SDMA_IC_OP_CNTL, ICACHE_PRIMED) == 1) &&
726 (REG_GET_FIELD(sdma_status, SDMA0_SDMA_STATUS_REG, UCODE_INIT_DONE) == 1))
727 break;
728 udelay(1);
729 }
730
731 if (j >= adev->usec_timeout) {
732 dev_err(adev->dev, "failed to init sdma ucode\n");
733 return -EINVAL;
734 }
735 }
736
737 return 0;
738 }
739
sdma_v7_1_soft_reset(struct amdgpu_ip_block * ip_block)740 static int sdma_v7_1_soft_reset(struct amdgpu_ip_block *ip_block)
741 {
742 struct amdgpu_device *adev = ip_block->adev;
743 uint32_t inst_mask;
744 u32 tmp;
745 int i;
746
747 inst_mask = GENMASK(NUM_XCC(adev->sdma.sdma_mask) - 1, 0);
748 sdma_v7_1_inst_gfx_stop(adev, inst_mask);
749
750 for_each_inst(i, inst_mask) {
751 //tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE));
752 //tmp |= SDMA0_SDMA_FREEZE__FREEZE_MASK;
753 //WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_FREEZE), tmp);
754 tmp = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL));
755 tmp |= SDMA0_SDMA_MCU_CNTL__HALT_MASK;
756 tmp |= SDMA0_SDMA_MCU_CNTL__RESET_MASK;
757 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_MCU_CNTL), tmp);
758
759 WREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_QUEUE0_PREEMPT), 0);
760
761 udelay(100);
762
763 tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
764 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
765 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
766
767 udelay(100);
768
769 WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
770 tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
771
772 udelay(100);
773 }
774
775 return sdma_v7_1_inst_start(adev, inst_mask);
776 }
777
sdma_v7_1_check_soft_reset(struct amdgpu_ip_block * ip_block)778 static bool sdma_v7_1_check_soft_reset(struct amdgpu_ip_block *ip_block)
779 {
780 struct amdgpu_device *adev = ip_block->adev;
781 struct amdgpu_ring *ring;
782 int i, r;
783 long tmo = msecs_to_jiffies(1000);
784
785 for (i = 0; i < adev->sdma.num_instances; i++) {
786 ring = &adev->sdma.instance[i].ring;
787 r = amdgpu_ring_test_ib(ring, tmo);
788 if (r)
789 return true;
790 }
791
792 return false;
793 }
794
sdma_v7_1_reset_queue(struct amdgpu_ring * ring,unsigned int vmid,struct amdgpu_fence * timedout_fence)795 static int sdma_v7_1_reset_queue(struct amdgpu_ring *ring,
796 unsigned int vmid,
797 struct amdgpu_fence *timedout_fence)
798 {
799 struct amdgpu_device *adev = ring->adev;
800 int r;
801
802 if (ring->me >= adev->sdma.num_instances) {
803 dev_err(adev->dev, "sdma instance not found\n");
804 return -EINVAL;
805 }
806
807 amdgpu_ring_reset_helper_begin(ring, timedout_fence);
808
809 r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true, 0);
810 if (r)
811 return r;
812
813 r = sdma_v7_1_gfx_resume_instance(adev, ring->me, true);
814 if (r)
815 return r;
816
817 return amdgpu_ring_reset_helper_end(ring, timedout_fence);
818 }
819
820 /**
821 * sdma_v7_1_inst_start - setup and start the async dma engines
822 *
823 * @adev: amdgpu_device pointer
824 * @inst_mask: mask of dma engine instances to be enabled
825 *
826 * Set up the DMA engines and enable them.
827 * Returns 0 for success, error for failure.
828 */
sdma_v7_1_inst_start(struct amdgpu_device * adev,uint32_t inst_mask)829 static int sdma_v7_1_inst_start(struct amdgpu_device *adev,
830 uint32_t inst_mask)
831 {
832 int r = 0;
833
834 if (amdgpu_sriov_vf(adev)) {
835 sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
836 sdma_v7_1_inst_enable(adev, false, inst_mask);
837
838 /* set RB registers */
839 r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
840 return r;
841 }
842
843 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
844 r = sdma_v7_1_inst_load_microcode(adev, inst_mask);
845 if (r) {
846 sdma_v7_1_inst_free_ucode_buffer(adev, inst_mask);
847 return r;
848 }
849
850 if (amdgpu_emu_mode == 1)
851 msleep(1000);
852 }
853
854 /* unhalt the MEs */
855 sdma_v7_1_inst_enable(adev, true, inst_mask);
856 /* enable sdma ring preemption */
857 sdma_v7_1_inst_ctx_switch_enable(adev, true, inst_mask);
858
859 /* start the gfx rings and rlc compute queues */
860 r = sdma_v7_1_inst_gfx_resume(adev, inst_mask);
861 if (r)
862 return r;
863 r = sdma_v7_1_inst_rlc_resume(adev, inst_mask);
864
865 return r;
866 }
867
sdma_v7_1_mqd_init(struct amdgpu_device * adev,void * mqd,struct amdgpu_mqd_prop * prop)868 static int sdma_v7_1_mqd_init(struct amdgpu_device *adev, void *mqd,
869 struct amdgpu_mqd_prop *prop)
870 {
871 struct v12_sdma_mqd *m = mqd;
872 uint64_t wb_gpu_addr;
873
874 m->sdmax_rlcx_rb_cntl =
875 order_base_2(prop->queue_size / 4) << SDMA0_SDMA_QUEUE0_RB_CNTL__RB_SIZE__SHIFT |
876 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
877 4 << SDMA0_SDMA_QUEUE0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT |
878 1 << SDMA0_SDMA_QUEUE0_RB_CNTL__MCU_WPTR_POLL_ENABLE__SHIFT;
879
880 m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8);
881 m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8);
882
883 wb_gpu_addr = prop->wptr_gpu_addr;
884 m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr);
885 m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr);
886
887 wb_gpu_addr = prop->rptr_gpu_addr;
888 m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr);
889 m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr);
890
891 m->sdmax_rlcx_ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_1_get_reg_offset(adev, 0,
892 regSDMA0_SDMA_QUEUE0_IB_CNTL));
893
894 m->sdmax_rlcx_doorbell_offset =
895 prop->doorbell_index << SDMA0_SDMA_QUEUE0_DOORBELL_OFFSET__OFFSET__SHIFT;
896
897 m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_SDMA_QUEUE0_DOORBELL, ENABLE, 1);
898
899 m->sdmax_rlcx_doorbell_log = 0;
900 m->sdmax_rlcx_rb_aql_cntl = 0x4000; //regSDMA0_SDMA_QUEUE0_RB_AQL_CNTL_DEFAULT;
901 m->sdmax_rlcx_dummy_reg = 0xf; //regSDMA0_SDMA_QUEUE0_DUMMY_REG_DEFAULT;
902
903 m->sdmax_rlcx_csa_addr_lo = lower_32_bits(prop->csa_addr);
904 m->sdmax_rlcx_csa_addr_hi = upper_32_bits(prop->csa_addr);
905
906 return 0;
907 }
908
sdma_v7_1_set_mqd_funcs(struct amdgpu_device * adev)909 static void sdma_v7_1_set_mqd_funcs(struct amdgpu_device *adev)
910 {
911 adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v12_sdma_mqd);
912 adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v7_1_mqd_init;
913 }
914
915 /**
916 * sdma_v7_1_ring_test_ring - simple async dma engine test
917 *
918 * @ring: amdgpu_ring structure holding ring information
919 *
920 * Test the DMA engine by writing using it to write an
921 * value to memory.
922 * Returns 0 for success, error for failure.
923 */
sdma_v7_1_ring_test_ring(struct amdgpu_ring * ring)924 static int sdma_v7_1_ring_test_ring(struct amdgpu_ring *ring)
925 {
926 struct amdgpu_device *adev = ring->adev;
927 unsigned i;
928 unsigned index;
929 int r;
930 u32 tmp;
931 u64 gpu_addr;
932
933 tmp = 0xCAFEDEAD;
934
935 r = amdgpu_device_wb_get(adev, &index);
936 if (r) {
937 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
938 return r;
939 }
940
941 gpu_addr = adev->wb.gpu_addr + (index * 4);
942 adev->wb.wb[index] = cpu_to_le32(tmp);
943
944 r = amdgpu_ring_alloc(ring, 5);
945 if (r) {
946 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
947 amdgpu_device_wb_free(adev, index);
948 return r;
949 }
950
951 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
952 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
953 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
954 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
955 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
956 amdgpu_ring_write(ring, 0xDEADBEEF);
957 amdgpu_ring_commit(ring);
958
959 for (i = 0; i < adev->usec_timeout; i++) {
960 tmp = le32_to_cpu(adev->wb.wb[index]);
961 if (tmp == 0xDEADBEEF)
962 break;
963 if (amdgpu_emu_mode == 1)
964 msleep(1);
965 else
966 udelay(1);
967 }
968
969 if (i >= adev->usec_timeout)
970 r = -ETIMEDOUT;
971
972 amdgpu_device_wb_free(adev, index);
973
974 return r;
975 }
976
977 /**
978 * sdma_v7_1_ring_test_ib - test an IB on the DMA engine
979 *
980 * @ring: amdgpu_ring structure holding ring information
981 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
982 *
983 * Test a simple IB in the DMA ring.
984 * Returns 0 on success, error on failure.
985 */
sdma_v7_1_ring_test_ib(struct amdgpu_ring * ring,long timeout)986 static int sdma_v7_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
987 {
988 struct amdgpu_device *adev = ring->adev;
989 struct amdgpu_ib ib;
990 struct dma_fence *f = NULL;
991 unsigned index;
992 long r;
993 u32 tmp = 0;
994 u64 gpu_addr;
995
996 tmp = 0xCAFEDEAD;
997 memset(&ib, 0, sizeof(ib));
998
999 r = amdgpu_device_wb_get(adev, &index);
1000 if (r) {
1001 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
1002 return r;
1003 }
1004
1005 gpu_addr = adev->wb.gpu_addr + (index * 4);
1006 adev->wb.wb[index] = cpu_to_le32(tmp);
1007
1008 r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib);
1009 if (r) {
1010 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
1011 goto err0;
1012 }
1013
1014 ib.ptr[0] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1015 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1016 ib.ptr[1] = lower_32_bits(gpu_addr);
1017 ib.ptr[2] = upper_32_bits(gpu_addr);
1018 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1019 ib.ptr[4] = 0xDEADBEEF;
1020 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1021 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1022 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1023 ib.length_dw = 8;
1024
1025 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1026 if (r)
1027 goto err1;
1028
1029 r = dma_fence_wait_timeout(f, false, timeout);
1030 if (r == 0) {
1031 DRM_ERROR("amdgpu: IB test timed out\n");
1032 r = -ETIMEDOUT;
1033 goto err1;
1034 } else if (r < 0) {
1035 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
1036 goto err1;
1037 }
1038
1039 tmp = le32_to_cpu(adev->wb.wb[index]);
1040
1041 if (tmp == 0xDEADBEEF)
1042 r = 0;
1043 else
1044 r = -EINVAL;
1045
1046 err1:
1047 amdgpu_ib_free(&ib, NULL);
1048 dma_fence_put(f);
1049 err0:
1050 amdgpu_device_wb_free(adev, index);
1051 return r;
1052 }
1053
1054
1055 /**
1056 * sdma_v7_1_vm_copy_pte - update PTEs by copying them from the GART
1057 *
1058 * @ib: indirect buffer to fill with commands
1059 * @pe: addr of the page entry
1060 * @src: src addr to copy from
1061 * @count: number of page entries to update
1062 *
1063 * Update PTEs by copying them from the GART using sDMA.
1064 */
sdma_v7_1_vm_copy_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t src,unsigned count)1065 static void sdma_v7_1_vm_copy_pte(struct amdgpu_ib *ib,
1066 uint64_t pe, uint64_t src,
1067 unsigned count)
1068 {
1069 unsigned bytes = count * 8;
1070
1071 ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1072 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1073
1074 ib->ptr[ib->length_dw++] = bytes - 1;
1075 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1076 ib->ptr[ib->length_dw++] = lower_32_bits(src);
1077 ib->ptr[ib->length_dw++] = upper_32_bits(src);
1078 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1079 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1080
1081 }
1082
1083 /**
1084 * sdma_v7_1_vm_write_pte - update PTEs by writing them manually
1085 *
1086 * @ib: indirect buffer to fill with commands
1087 * @pe: addr of the page entry
1088 * @value: dst addr to write into pe
1089 * @count: number of page entries to update
1090 * @incr: increase next addr by incr bytes
1091 *
1092 * Update PTEs by writing them manually using sDMA.
1093 */
sdma_v7_1_vm_write_pte(struct amdgpu_ib * ib,uint64_t pe,uint64_t value,unsigned count,uint32_t incr)1094 static void sdma_v7_1_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1095 uint64_t value, unsigned count,
1096 uint32_t incr)
1097 {
1098 unsigned ndw = count * 2;
1099
1100 ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_WRITE) |
1101 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1102 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1103 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1104 ib->ptr[ib->length_dw++] = ndw - 1;
1105 for (; ndw > 0; ndw -= 2) {
1106 ib->ptr[ib->length_dw++] = lower_32_bits(value);
1107 ib->ptr[ib->length_dw++] = upper_32_bits(value);
1108 value += incr;
1109 }
1110 }
1111
1112 /**
1113 * sdma_v7_1_vm_set_pte_pde - update the page tables using sDMA
1114 *
1115 * @ib: indirect buffer to fill with commands
1116 * @pe: addr of the page entry
1117 * @addr: dst addr to write into pe
1118 * @count: number of page entries to update
1119 * @incr: increase next addr by incr bytes
1120 * @flags: access flags
1121 *
1122 * Update the page tables using sDMA.
1123 */
sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib * ib,uint64_t pe,uint64_t addr,unsigned count,uint32_t incr,uint64_t flags)1124 static void sdma_v7_1_vm_set_pte_pde(struct amdgpu_ib *ib,
1125 uint64_t pe,
1126 uint64_t addr, unsigned count,
1127 uint32_t incr, uint64_t flags)
1128 {
1129 /* for physically contiguous pages (vram) */
1130 u32 header = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_PTEPDE);
1131
1132 /* TODO:
1133 * When VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is enabled, change below MTYPE
1134 * to RW for AID A1 and UC for AID A0. NC needs additional GCR flush and need not
1135 * be supported. Also, honour amdgpu_mtype_local override. RW would additionally
1136 * require setting SCOPE bits in the header.
1137 *
1138 * header |= (SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x2:RW) |
1139 * SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1) |
1140 * SDMA_PKT_PTEPDE_COPY_HEADER_SCOPE(0x3:SYS_SCOPE));
1141 */
1142
1143 /* VM_L2_CNTL5.WALKER_FETCH_PDE_MTYPE_ENABLE is 0 which defaults to UC. So,
1144 * use MTYPE_UC (0x3). For ref. MTYPE_RW=0x2 MTYPE_NC=0x0
1145 */
1146 header |= SDMA_PKT_PTEPDE_COPY_HEADER_MTYPE(0x3) | SDMA_PKT_PTEPDE_COPY_HEADER_SNOOP(0x1);
1147
1148 ib->ptr[ib->length_dw++] = header;
1149 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1150 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1151 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1152 ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1153 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1154 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1155 ib->ptr[ib->length_dw++] = incr; /* increment size */
1156 ib->ptr[ib->length_dw++] = 0;
1157 ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1158 }
1159
1160 /**
1161 * sdma_v7_1_ring_pad_ib - pad the IB
1162 *
1163 * @ring: amdgpu ring pointer
1164 * @ib: indirect buffer to fill with padding
1165 *
1166 * Pad the IB with NOPs to a boundary multiple of 8.
1167 */
sdma_v7_1_ring_pad_ib(struct amdgpu_ring * ring,struct amdgpu_ib * ib)1168 static void sdma_v7_1_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1169 {
1170 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1171 u32 pad_count;
1172 int i;
1173
1174 pad_count = (-ib->length_dw) & 0x7;
1175 for (i = 0; i < pad_count; i++)
1176 if (sdma && sdma->burst_nop && (i == 0))
1177 ib->ptr[ib->length_dw++] =
1178 SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP) |
1179 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1180 else
1181 ib->ptr[ib->length_dw++] =
1182 SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_NOP);
1183 }
1184
1185 /**
1186 * sdma_v7_1_ring_emit_pipeline_sync - sync the pipeline
1187 *
1188 * @ring: amdgpu_ring pointer
1189 *
1190 * Make sure all previous operations are completed (CIK).
1191 */
sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring * ring)1192 static void sdma_v7_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1193 {
1194 uint32_t seq = ring->fence_drv.sync_seq;
1195 uint64_t addr = ring->fence_drv.gpu_addr;
1196
1197 /* wait for idle */
1198 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1199 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
1200 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
1201 amdgpu_ring_write(ring, addr & 0xfffffffc);
1202 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
1203 amdgpu_ring_write(ring, seq); /* reference */
1204 amdgpu_ring_write(ring, 0xffffffff); /* mask */
1205 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1206 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
1207 }
1208
1209 /**
1210 * sdma_v7_1_ring_emit_vm_flush - vm flush using sDMA
1211 *
1212 * @ring: amdgpu_ring pointer
1213 * @vmid: vmid number to use
1214 * @pd_addr: address
1215 *
1216 * Update the page table base and flush the VM TLB
1217 * using sDMA.
1218 */
sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)1219 static void sdma_v7_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
1220 unsigned vmid, uint64_t pd_addr)
1221 {
1222 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1223 }
1224
sdma_v7_1_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)1225 static void sdma_v7_1_ring_emit_wreg(struct amdgpu_ring *ring,
1226 uint32_t reg, uint32_t val)
1227 {
1228 /* SRBM WRITE command will not support on sdma v7.
1229 * Use Register WRITE command instead, which OPCODE is same as SRBM WRITE
1230 */
1231 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_SRBM_WRITE));
1232 amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
1233 amdgpu_ring_write(ring, val);
1234 }
1235
sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)1236 static void sdma_v7_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1237 uint32_t val, uint32_t mask)
1238 {
1239 amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_POLL_REGMEM) |
1240 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
1241 amdgpu_ring_write(ring, soc_v1_0_normalize_xcc_reg_offset(reg) << 2);
1242 amdgpu_ring_write(ring, 0);
1243 amdgpu_ring_write(ring, val); /* reference */
1244 amdgpu_ring_write(ring, mask); /* mask */
1245 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
1246 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
1247 }
1248
sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)1249 static void sdma_v7_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
1250 uint32_t reg0, uint32_t reg1,
1251 uint32_t ref, uint32_t mask)
1252 {
1253 amdgpu_ring_emit_wreg(ring, reg0, ref);
1254 /* wait for a cycle to reset vm_inv_eng*_ack */
1255 amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0);
1256 amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask);
1257 }
1258
1259 static const struct amdgpu_vm_pte_funcs sdma_v7_1_vm_pte_funcs = {
1260 .copy_pte_num_dw = 8,
1261 .copy_pte = sdma_v7_1_vm_copy_pte,
1262 .write_pte = sdma_v7_1_vm_write_pte,
1263 .set_pte_pde = sdma_v7_1_vm_set_pte_pde,
1264 };
1265
sdma_v7_1_early_init(struct amdgpu_ip_block * ip_block)1266 static int sdma_v7_1_early_init(struct amdgpu_ip_block *ip_block)
1267 {
1268 struct amdgpu_device *adev = ip_block->adev;
1269 int r;
1270
1271 switch (amdgpu_user_queue) {
1272 case -1:
1273 default:
1274 adev->sdma.no_user_submission = true;
1275 adev->sdma.disable_uq = true;
1276 break;
1277 case 0:
1278 adev->sdma.no_user_submission = false;
1279 adev->sdma.disable_uq = true;
1280 break;
1281 }
1282
1283 r = amdgpu_sdma_init_microcode(adev, 0, true);
1284 if (r) {
1285 DRM_ERROR("Failed to init sdma firmware!\n");
1286 return r;
1287 }
1288
1289 sdma_v7_1_set_ring_funcs(adev);
1290 sdma_v7_1_set_buffer_funcs(adev);
1291 amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v7_1_vm_pte_funcs);
1292 sdma_v7_1_set_irq_funcs(adev);
1293 sdma_v7_1_set_mqd_funcs(adev);
1294
1295 return 0;
1296 }
1297
sdma_v7_1_sw_init(struct amdgpu_ip_block * ip_block)1298 static int sdma_v7_1_sw_init(struct amdgpu_ip_block *ip_block)
1299 {
1300 struct amdgpu_ring *ring;
1301 int r, i;
1302 struct amdgpu_device *adev = ip_block->adev;
1303 uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1304 uint32_t *ptr;
1305 u32 xcc_id;
1306
1307 /* SDMA trap event */
1308 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GFX,
1309 GFX_12_1_0__SRCID__SDMA_TRAP,
1310 &adev->sdma.trap_irq);
1311 if (r)
1312 return r;
1313
1314 for (i = 0; i < adev->sdma.num_instances; i++) {
1315 ring = &adev->sdma.instance[i].ring;
1316 ring->ring_obj = NULL;
1317 ring->use_doorbell = true;
1318 ring->me = i;
1319
1320 for (xcc_id = 0; xcc_id < fls(adev->gfx.xcc_mask); xcc_id++) {
1321 if (adev->sdma.instance[i].xcc_id == GET_INST(GC, xcc_id))
1322 break;
1323 }
1324
1325 DRM_DEBUG("SDMA%d.%d use_doorbell being set to: [%s]\n",
1326 xcc_id, GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc,
1327 ring->use_doorbell?"true":"false");
1328
1329 ring->doorbell_index =
1330 (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset
1331
1332 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
1333 sprintf(ring->name, "sdma%d.%d", xcc_id,
1334 GET_INST(SDMA0, i) % adev->sdma.num_inst_per_xcc);
1335 r = amdgpu_ring_init(adev, ring, 1024,
1336 &adev->sdma.trap_irq,
1337 AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1338 AMDGPU_RING_PRIO_DEFAULT, NULL);
1339 if (r)
1340 return r;
1341 }
1342
1343 adev->sdma.supported_reset =
1344 amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring);
1345 if (!amdgpu_sriov_vf(adev) &&
1346 !adev->debug_disable_gpu_ring_reset)
1347 adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
1348
1349 r = amdgpu_sdma_sysfs_reset_mask_init(adev);
1350 if (r)
1351 return r;
1352 /* Allocate memory for SDMA IP Dump buffer */
1353 ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL);
1354 if (ptr)
1355 adev->sdma.ip_dump = ptr;
1356 else
1357 DRM_ERROR("Failed to allocated memory for SDMA IP Dump\n");
1358
1359 #ifdef CONFIG_DRM_AMDGPU_NAVI3X_USERQ
1360 adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;
1361 #endif
1362
1363 return r;
1364 }
1365
sdma_v7_1_sw_fini(struct amdgpu_ip_block * ip_block)1366 static int sdma_v7_1_sw_fini(struct amdgpu_ip_block *ip_block)
1367 {
1368 struct amdgpu_device *adev = ip_block->adev;
1369 int i;
1370
1371 for (i = 0; i < adev->sdma.num_instances; i++)
1372 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1373
1374 amdgpu_sdma_sysfs_reset_mask_fini(adev);
1375 amdgpu_sdma_destroy_inst_ctx(adev, true);
1376
1377 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)
1378 sdma_v7_1_inst_free_ucode_buffer(adev, adev->sdma.sdma_mask);
1379
1380 kfree(adev->sdma.ip_dump);
1381
1382 return 0;
1383 }
1384
sdma_v7_1_hw_init(struct amdgpu_ip_block * ip_block)1385 static int sdma_v7_1_hw_init(struct amdgpu_ip_block *ip_block)
1386 {
1387 struct amdgpu_device *adev = ip_block->adev;
1388 uint32_t inst_mask;
1389
1390 inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1391
1392 return sdma_v7_1_inst_start(adev, inst_mask);
1393 }
1394
sdma_v7_1_hw_fini(struct amdgpu_ip_block * ip_block)1395 static int sdma_v7_1_hw_fini(struct amdgpu_ip_block *ip_block)
1396 {
1397 struct amdgpu_device *adev = ip_block->adev;
1398
1399 if (amdgpu_sriov_vf(adev))
1400 return 0;
1401
1402 sdma_v7_1_inst_ctx_switch_enable(adev, false, adev->sdma.sdma_mask);
1403 sdma_v7_1_inst_enable(adev, false, adev->sdma.sdma_mask);
1404
1405 return 0;
1406 }
1407
sdma_v7_1_suspend(struct amdgpu_ip_block * ip_block)1408 static int sdma_v7_1_suspend(struct amdgpu_ip_block *ip_block)
1409 {
1410 return sdma_v7_1_hw_fini(ip_block);
1411 }
1412
sdma_v7_1_resume(struct amdgpu_ip_block * ip_block)1413 static int sdma_v7_1_resume(struct amdgpu_ip_block *ip_block)
1414 {
1415 return sdma_v7_1_hw_init(ip_block);
1416 }
1417
sdma_v7_1_is_idle(struct amdgpu_ip_block * ip_block)1418 static bool sdma_v7_1_is_idle(struct amdgpu_ip_block *ip_block)
1419 {
1420 struct amdgpu_device *adev = ip_block->adev;
1421 u32 i;
1422
1423 for (i = 0; i < adev->sdma.num_instances; i++) {
1424 u32 tmp = RREG32(sdma_v7_1_get_reg_offset(adev, i, regSDMA0_SDMA_STATUS_REG));
1425
1426 if (!(tmp & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1427 return false;
1428 }
1429
1430 return true;
1431 }
1432
sdma_v7_1_wait_for_idle(struct amdgpu_ip_block * ip_block)1433 static int sdma_v7_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
1434 {
1435 unsigned i, j;
1436 u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1437 struct amdgpu_device *adev = ip_block->adev;
1438
1439 for (i = 0; i < adev->usec_timeout; i++) {
1440 for (j = 0; j < adev->sdma.num_instances; j++) {
1441 sdma[j] = RREG32(sdma_v7_1_get_reg_offset(adev,
1442 j, regSDMA0_SDMA_STATUS_REG));
1443 if (!(sdma[j] & SDMA0_SDMA_STATUS_REG__IDLE_MASK))
1444 break;
1445 }
1446 if (j == adev->sdma.num_instances)
1447 return 0;
1448 udelay(1);
1449 }
1450 return -ETIMEDOUT;
1451 }
1452
sdma_v7_1_ring_preempt_ib(struct amdgpu_ring * ring)1453 static int sdma_v7_1_ring_preempt_ib(struct amdgpu_ring *ring)
1454 {
1455 int i, r = 0;
1456 struct amdgpu_device *adev = ring->adev;
1457 u32 index = 0;
1458 u64 sdma_gfx_preempt;
1459
1460 amdgpu_sdma_get_index_from_ring(ring, &index);
1461 sdma_gfx_preempt =
1462 sdma_v7_1_get_reg_offset(adev, index, regSDMA0_SDMA_QUEUE0_PREEMPT);
1463
1464 /* assert preemption condition */
1465 amdgpu_ring_set_preempt_cond_exec(ring, false);
1466
1467 /* emit the trailing fence */
1468 ring->trail_seq += 1;
1469 r = amdgpu_ring_alloc(ring, 10);
1470 if (r) {
1471 DRM_ERROR("ring %d failed to be allocated \n", ring->idx);
1472 return r;
1473 }
1474 sdma_v7_1_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
1475 ring->trail_seq, 0);
1476 amdgpu_ring_commit(ring);
1477
1478 /* assert IB preemption */
1479 WREG32(sdma_gfx_preempt, 1);
1480
1481 /* poll the trailing fence */
1482 for (i = 0; i < adev->usec_timeout; i++) {
1483 if (ring->trail_seq ==
1484 le32_to_cpu(*(ring->trail_fence_cpu_addr)))
1485 break;
1486 udelay(1);
1487 }
1488
1489 if (i >= adev->usec_timeout) {
1490 r = -EINVAL;
1491 DRM_ERROR("ring %d failed to be preempted\n", ring->idx);
1492 }
1493
1494 /* deassert IB preemption */
1495 WREG32(sdma_gfx_preempt, 0);
1496
1497 /* deassert the preemption condition */
1498 amdgpu_ring_set_preempt_cond_exec(ring, true);
1499 return r;
1500 }
1501
sdma_v7_1_set_trap_irq_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)1502 static int sdma_v7_1_set_trap_irq_state(struct amdgpu_device *adev,
1503 struct amdgpu_irq_src *source,
1504 unsigned type,
1505 enum amdgpu_interrupt_state state)
1506 {
1507 u32 sdma_cntl;
1508
1509 u32 reg_offset = sdma_v7_1_get_reg_offset(adev, type, regSDMA0_SDMA_CNTL);
1510
1511 sdma_cntl = RREG32(reg_offset);
1512 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_SDMA_CNTL, TRAP_ENABLE,
1513 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1514 WREG32(reg_offset, sdma_cntl);
1515
1516 return 0;
1517 }
1518
sdma_v7_1_process_trap_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1519 static int sdma_v7_1_process_trap_irq(struct amdgpu_device *adev,
1520 struct amdgpu_irq_src *source,
1521 struct amdgpu_iv_entry *entry)
1522 {
1523 int inst, instances, queue, xcc_id = 0;
1524
1525 DRM_DEBUG("IH: SDMA trap\n");
1526
1527 if (drm_WARN_ON_ONCE(&adev->ddev,
1528 adev->enable_mes &&
1529 (entry->src_data[0] & AMDGPU_FENCE_MES_QUEUE_FLAG)))
1530 return 0;
1531
1532 queue = entry->ring_id & 0xf;
1533 if (adev->gfx.funcs && adev->gfx.funcs->ih_node_to_logical_xcc)
1534 xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, entry->node_id);
1535 else
1536 dev_warn(adev->dev, "IH: SDMA may get wrong xcc id as gfx function not available\n");
1537 inst = ((entry->ring_id & 0xf0) >> 4) +
1538 GET_INST(GC, xcc_id) * adev->sdma.num_inst_per_xcc;
1539 for (instances = 0; instances < adev->sdma.num_instances; instances++) {
1540 if (inst == GET_INST(SDMA0, instances))
1541 break;
1542 }
1543 if (instances > adev->sdma.num_instances - 1) {
1544 DRM_ERROR("IH: wrong ring_ID detected, as wrong sdma instance\n");
1545 return -EINVAL;
1546 }
1547
1548 switch (entry->client_id) {
1549 case SOC_V1_0_IH_CLIENTID_GFX:
1550 switch (queue) {
1551 case 0:
1552 amdgpu_fence_process(&adev->sdma.instance[instances].ring);
1553 break;
1554 default:
1555 break;
1556 }
1557 break;
1558 }
1559 return 0;
1560 }
1561
sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)1562 static int sdma_v7_1_process_illegal_inst_irq(struct amdgpu_device *adev,
1563 struct amdgpu_irq_src *source,
1564 struct amdgpu_iv_entry *entry)
1565 {
1566 return 0;
1567 }
1568
sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)1569 static int sdma_v7_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
1570 enum amd_clockgating_state state)
1571 {
1572 return 0;
1573 }
1574
sdma_v7_1_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)1575 static int sdma_v7_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
1576 enum amd_powergating_state state)
1577 {
1578 return 0;
1579 }
1580
sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)1581 static void sdma_v7_1_get_clockgating_state(struct amdgpu_ip_block *ip_block,
1582 u64 *flags)
1583 {
1584 }
1585
sdma_v7_1_print_ip_state(struct amdgpu_ip_block * ip_block,struct drm_printer * p)1586 static void sdma_v7_1_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p)
1587 {
1588 struct amdgpu_device *adev = ip_block->adev;
1589 int i, j;
1590 uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1591 uint32_t instance_offset;
1592
1593 if (!adev->sdma.ip_dump)
1594 return;
1595
1596 drm_printf(p, "num_instances:%d\n", adev->sdma.num_instances);
1597 for (i = 0; i < adev->sdma.num_instances; i++) {
1598 instance_offset = i * reg_count;
1599 drm_printf(p, "\nInstance:%d\n", i);
1600
1601 for (j = 0; j < reg_count; j++)
1602 drm_printf(p, "%-50s \t 0x%08x\n", sdma_reg_list_7_1[j].reg_name,
1603 adev->sdma.ip_dump[instance_offset + j]);
1604 }
1605 }
1606
sdma_v7_1_dump_ip_state(struct amdgpu_ip_block * ip_block)1607 static void sdma_v7_1_dump_ip_state(struct amdgpu_ip_block *ip_block)
1608 {
1609 struct amdgpu_device *adev = ip_block->adev;
1610 int i, j;
1611 uint32_t instance_offset;
1612 uint32_t reg_count = ARRAY_SIZE(sdma_reg_list_7_1);
1613
1614 if (!adev->sdma.ip_dump)
1615 return;
1616
1617 amdgpu_gfx_off_ctrl(adev, false);
1618 for (i = 0; i < adev->sdma.num_instances; i++) {
1619 instance_offset = i * reg_count;
1620 for (j = 0; j < reg_count; j++)
1621 adev->sdma.ip_dump[instance_offset + j] =
1622 RREG32(sdma_v7_1_get_reg_offset(adev, i,
1623 sdma_reg_list_7_1[j].reg_offset));
1624 }
1625 amdgpu_gfx_off_ctrl(adev, true);
1626 }
1627
1628 const struct amd_ip_funcs sdma_v7_1_ip_funcs = {
1629 .name = "sdma_v7_1",
1630 .early_init = sdma_v7_1_early_init,
1631 .late_init = NULL,
1632 .sw_init = sdma_v7_1_sw_init,
1633 .sw_fini = sdma_v7_1_sw_fini,
1634 .hw_init = sdma_v7_1_hw_init,
1635 .hw_fini = sdma_v7_1_hw_fini,
1636 .suspend = sdma_v7_1_suspend,
1637 .resume = sdma_v7_1_resume,
1638 .is_idle = sdma_v7_1_is_idle,
1639 .wait_for_idle = sdma_v7_1_wait_for_idle,
1640 .soft_reset = sdma_v7_1_soft_reset,
1641 .check_soft_reset = sdma_v7_1_check_soft_reset,
1642 .set_clockgating_state = sdma_v7_1_set_clockgating_state,
1643 .set_powergating_state = sdma_v7_1_set_powergating_state,
1644 .get_clockgating_state = sdma_v7_1_get_clockgating_state,
1645 .dump_ip_state = sdma_v7_1_dump_ip_state,
1646 .print_ip_state = sdma_v7_1_print_ip_state,
1647 };
1648
1649 static const struct amdgpu_ring_funcs sdma_v7_1_ring_funcs = {
1650 .type = AMDGPU_RING_TYPE_SDMA,
1651 .align_mask = 0xf,
1652 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1653 .support_64bit_ptrs = true,
1654 .secure_submission_supported = true,
1655 .get_rptr = sdma_v7_1_ring_get_rptr,
1656 .get_wptr = sdma_v7_1_ring_get_wptr,
1657 .set_wptr = sdma_v7_1_ring_set_wptr,
1658 .emit_frame_size =
1659 5 + /* sdma_v7_1_ring_init_cond_exec */
1660 6 + /* sdma_v7_1_ring_emit_pipeline_sync */
1661 /* sdma_v7_1_ring_emit_vm_flush */
1662 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1663 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1664 10 + 10 + 10, /* sdma_v7_1_ring_emit_fence x3 for user fence, vm fence */
1665 .emit_ib_size = 5 + 7 + 6, /* sdma_v7_1_ring_emit_ib */
1666 .emit_ib = sdma_v7_1_ring_emit_ib,
1667 .emit_mem_sync = sdma_v7_1_ring_emit_mem_sync,
1668 .emit_fence = sdma_v7_1_ring_emit_fence,
1669 .emit_pipeline_sync = sdma_v7_1_ring_emit_pipeline_sync,
1670 .emit_vm_flush = sdma_v7_1_ring_emit_vm_flush,
1671 .test_ring = sdma_v7_1_ring_test_ring,
1672 .test_ib = sdma_v7_1_ring_test_ib,
1673 .insert_nop = sdma_v7_1_ring_insert_nop,
1674 .pad_ib = sdma_v7_1_ring_pad_ib,
1675 .emit_wreg = sdma_v7_1_ring_emit_wreg,
1676 .emit_reg_wait = sdma_v7_1_ring_emit_reg_wait,
1677 .emit_reg_write_reg_wait = sdma_v7_1_ring_emit_reg_write_reg_wait,
1678 .init_cond_exec = sdma_v7_1_ring_init_cond_exec,
1679 .preempt_ib = sdma_v7_1_ring_preempt_ib,
1680 .reset = sdma_v7_1_reset_queue,
1681 };
1682
sdma_v7_1_set_ring_funcs(struct amdgpu_device * adev)1683 static void sdma_v7_1_set_ring_funcs(struct amdgpu_device *adev)
1684 {
1685 int i, dev_inst;
1686
1687 for (i = 0; i < adev->sdma.num_instances; i++) {
1688 adev->sdma.instance[i].ring.funcs = &sdma_v7_1_ring_funcs;
1689 adev->sdma.instance[i].ring.me = i;
1690
1691 dev_inst = GET_INST(SDMA0, i);
1692 /* XCC to which SDMA belongs depends on physical instance */
1693 adev->sdma.instance[i].xcc_id =
1694 dev_inst / adev->sdma.num_inst_per_xcc;
1695 }
1696 }
1697
1698 static const struct amdgpu_irq_src_funcs sdma_v7_1_trap_irq_funcs = {
1699 .set = sdma_v7_1_set_trap_irq_state,
1700 .process = sdma_v7_1_process_trap_irq,
1701 };
1702
1703 static const struct amdgpu_irq_src_funcs sdma_v7_1_illegal_inst_irq_funcs = {
1704 .process = sdma_v7_1_process_illegal_inst_irq,
1705 };
1706
sdma_v7_1_set_irq_funcs(struct amdgpu_device * adev)1707 static void sdma_v7_1_set_irq_funcs(struct amdgpu_device *adev)
1708 {
1709 adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 +
1710 adev->sdma.num_instances;
1711 adev->sdma.trap_irq.funcs = &sdma_v7_1_trap_irq_funcs;
1712 adev->sdma.illegal_inst_irq.funcs = &sdma_v7_1_illegal_inst_irq_funcs;
1713 }
1714
1715 /**
1716 * sdma_v7_1_emit_copy_buffer - copy buffer using the sDMA engine
1717 *
1718 * @ib: indirect buffer to fill with commands
1719 * @src_offset: src GPU address
1720 * @dst_offset: dst GPU address
1721 * @byte_count: number of bytes to xfer
1722 * @copy_flags: copy flags for the buffers
1723 *
1724 * Copy GPU buffers using the DMA engine.
1725 * Used by the amdgpu ttm implementation to move pages if
1726 * registered as the asic copy callback.
1727 */
sdma_v7_1_emit_copy_buffer(struct amdgpu_ib * ib,uint64_t src_offset,uint64_t dst_offset,uint32_t byte_count,uint32_t copy_flags)1728 static void sdma_v7_1_emit_copy_buffer(struct amdgpu_ib *ib,
1729 uint64_t src_offset,
1730 uint64_t dst_offset,
1731 uint32_t byte_count,
1732 uint32_t copy_flags)
1733 {
1734 ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) |
1735 SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1736 SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0);
1737
1738 ib->ptr[ib->length_dw++] = byte_count - 1;
1739 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1740 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1741 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1742 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1743 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1744 }
1745
1746 /**
1747 * sdma_v7_1_emit_fill_buffer - fill buffer using the sDMA engine
1748 *
1749 * @ib: indirect buffer to fill
1750 * @src_data: value to write to buffer
1751 * @dst_offset: dst GPU address
1752 * @byte_count: number of bytes to xfer
1753 *
1754 * Fill GPU buffers using the DMA engine.
1755 */
sdma_v7_1_emit_fill_buffer(struct amdgpu_ib * ib,uint32_t src_data,uint64_t dst_offset,uint32_t byte_count)1756 static void sdma_v7_1_emit_fill_buffer(struct amdgpu_ib *ib,
1757 uint32_t src_data,
1758 uint64_t dst_offset,
1759 uint32_t byte_count)
1760 {
1761 ib->ptr[ib->length_dw++] = SDMA_PKT_CONSTANT_FILL_HEADER_OP(SDMA_OP_CONST_FILL);
1762 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1763 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1764 ib->ptr[ib->length_dw++] = src_data;
1765 ib->ptr[ib->length_dw++] = byte_count - 1;
1766 }
1767
1768 static const struct amdgpu_buffer_funcs sdma_v7_1_buffer_funcs = {
1769 .copy_max_bytes = 1 << 30,
1770 .copy_num_dw = 8,
1771 .emit_copy_buffer = sdma_v7_1_emit_copy_buffer,
1772 .fill_max_bytes = 1 << 30,
1773 .fill_num_dw = 5,
1774 .emit_fill_buffer = sdma_v7_1_emit_fill_buffer,
1775 };
1776
sdma_v7_1_set_buffer_funcs(struct amdgpu_device * adev)1777 static void sdma_v7_1_set_buffer_funcs(struct amdgpu_device *adev)
1778 {
1779 adev->mman.buffer_funcs = &sdma_v7_1_buffer_funcs;
1780 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1781 }
1782
1783 const struct amdgpu_ip_block_version sdma_v7_1_ip_block = {
1784 .type = AMD_IP_BLOCK_TYPE_SDMA,
1785 .major = 7,
1786 .minor = 1,
1787 .rev = 0,
1788 .funcs = &sdma_v7_1_ip_funcs,
1789 };
1790
sdma_v7_1_xcp_resume(void * handle,uint32_t inst_mask)1791 static int sdma_v7_1_xcp_resume(void *handle, uint32_t inst_mask)
1792 {
1793 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1794 int r;
1795
1796 r = sdma_v7_1_inst_start(adev, inst_mask);
1797
1798 return r;
1799 }
1800
sdma_v7_1_xcp_suspend(void * handle,uint32_t inst_mask)1801 static int sdma_v7_1_xcp_suspend(void *handle, uint32_t inst_mask)
1802 {
1803 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1804
1805 sdma_v7_1_inst_ctx_switch_enable(adev, false, inst_mask);
1806 sdma_v7_1_inst_enable(adev, false, inst_mask);
1807
1808 return 0;
1809 }
1810
1811 struct amdgpu_xcp_ip_funcs sdma_v7_1_xcp_funcs = {
1812 .suspend = &sdma_v7_1_xcp_suspend,
1813 .resume = &sdma_v7_1_xcp_resume
1814 };
1815