1 /*
2 * Copyright 2025 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/delay.h>
24 #include <linux/kernel.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include "amdgpu.h"
29 #include "amdgpu_gfx.h"
30 #include "amdgpu_psp.h"
31 #include "amdgpu_smu.h"
32 #include "amdgpu_atomfirmware.h"
33 #include "amdgpu_userq_fence.h"
34 #include "imu_v12_1.h"
35 #include "soc_v1_0.h"
36 #include "gfx_v12_1_pkt.h"
37
38 #include "gc/gc_12_1_0_offset.h"
39 #include "gc/gc_12_1_0_sh_mask.h"
40 #include "soc24_enum.h"
41 #include "ivsrcid/gfx/irqsrcs_gfx_12_1_0.h"
42
43 #include "soc15.h"
44 #include "clearstate_gfx12.h"
45 #include "v12_structs.h"
46 #include "gfx_v12_1.h"
47 #include "mes_v12_1.h"
48 #include "amdgpu_ras_mgr.h"
49
50 #define GFX12_MEC_HPD_SIZE 2048
51 #define NUM_SIMD_PER_CU_GFX12_1 4
52
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54
55 #define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000000
56 #define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000
57 #define regCP_MQD_CONTROL_DEFAULT 0x00000100
58 #define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509
59 #define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000
60 #define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0ae06301
61 #define regCP_HQD_IB_CONTROL_DEFAULT 0x00100000
62
63 MODULE_FIRMWARE("amdgpu/gc_12_1_0_mec.bin");
64 MODULE_FIRMWARE("amdgpu/gc_12_1_0_rlc.bin");
65
66 #define SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 0x00000001
67 #define DEFAULT_SH_MEM_CONFIG \
68 ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \
69 (SH_MEM_ALIGNMENT_MODE_UNALIGNED_GFX12_1_0 << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \
70 (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT))
71
72 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id);
73 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev);
74 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev);
75 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev);
76 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev);
77 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev);
78 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
79 struct amdgpu_cu_info *cu_info);
80 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev);
81 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
82 u32 sh_num, u32 instance, int xcc_id);
83 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
84 uint32_t val);
85 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
86 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
87 uint16_t pasid, uint32_t flush_type,
88 bool all_hub, uint8_t dst_sel);
89 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev, int xcc_id);
90 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev, int xcc_id);
91 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
92 bool enable);
93 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
94 bool enable, int xcc_id);
95 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev);
96
gfx_v12_1_kiq_set_resources(struct amdgpu_ring * kiq_ring,uint64_t queue_mask)97 static void gfx_v12_1_kiq_set_resources(struct amdgpu_ring *kiq_ring,
98 uint64_t queue_mask)
99 {
100 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
101 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
102 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
103 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
104 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
105 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
106 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
107 amdgpu_ring_write(kiq_ring, 0); /* oac mask */
108 amdgpu_ring_write(kiq_ring, 0);
109 }
110
gfx_v12_1_kiq_map_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring)111 static void gfx_v12_1_kiq_map_queues(struct amdgpu_ring *kiq_ring,
112 struct amdgpu_ring *ring)
113 {
114 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
115 uint64_t wptr_addr = ring->wptr_gpu_addr;
116 uint32_t me = 0, eng_sel = 0;
117
118 switch (ring->funcs->type) {
119 case AMDGPU_RING_TYPE_COMPUTE:
120 me = 1;
121 eng_sel = 0;
122 break;
123 case AMDGPU_RING_TYPE_MES:
124 me = 2;
125 eng_sel = 5;
126 break;
127 default:
128 WARN_ON(1);
129 }
130
131 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
132 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
133 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
134 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
135 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
136 PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
137 PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
138 PACKET3_MAP_QUEUES_ME((me)) |
139 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
140 PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
141 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
142 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
143 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
144 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
145 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
146 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
147 }
148
gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,enum amdgpu_unmap_queues_action action,u64 gpu_addr,u64 seq)149 static void gfx_v12_1_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
150 struct amdgpu_ring *ring,
151 enum amdgpu_unmap_queues_action action,
152 u64 gpu_addr, u64 seq)
153 {
154 struct amdgpu_device *adev = kiq_ring->adev;
155 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
156
157 if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
158 amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr,
159 seq, kiq_ring->xcc_id);
160 return;
161 }
162
163 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
164 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
165 PACKET3_UNMAP_QUEUES_ACTION(action) |
166 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
167 PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
168 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
169 amdgpu_ring_write(kiq_ring,
170 PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
171
172 if (action == PREEMPT_QUEUES_NO_UNMAP) {
173 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
174 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
175 amdgpu_ring_write(kiq_ring, seq);
176 } else {
177 amdgpu_ring_write(kiq_ring, 0);
178 amdgpu_ring_write(kiq_ring, 0);
179 amdgpu_ring_write(kiq_ring, 0);
180 }
181 }
182
gfx_v12_1_kiq_query_status(struct amdgpu_ring * kiq_ring,struct amdgpu_ring * ring,u64 addr,u64 seq)183 static void gfx_v12_1_kiq_query_status(struct amdgpu_ring *kiq_ring,
184 struct amdgpu_ring *ring,
185 u64 addr, u64 seq)
186 {
187 uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
188
189 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
190 amdgpu_ring_write(kiq_ring,
191 PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
192 PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
193 PACKET3_QUERY_STATUS_COMMAND(2));
194 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
195 PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
196 PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
197 amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
198 amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
199 amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
200 amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
201 }
202
gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring * kiq_ring,uint16_t pasid,uint32_t flush_type,bool all_hub)203 static void gfx_v12_1_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
204 uint16_t pasid,
205 uint32_t flush_type,
206 bool all_hub)
207 {
208 gfx_v12_1_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1);
209 }
210
211 static const struct kiq_pm4_funcs gfx_v12_1_kiq_pm4_funcs = {
212 .kiq_set_resources = gfx_v12_1_kiq_set_resources,
213 .kiq_map_queues = gfx_v12_1_kiq_map_queues,
214 .kiq_unmap_queues = gfx_v12_1_kiq_unmap_queues,
215 .kiq_query_status = gfx_v12_1_kiq_query_status,
216 .kiq_invalidate_tlbs = gfx_v12_1_kiq_invalidate_tlbs,
217 .set_resources_size = 8,
218 .map_queues_size = 7,
219 .unmap_queues_size = 6,
220 .query_status_size = 7,
221 .invalidate_tlbs_size = 2,
222 };
223
gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device * adev)224 static void gfx_v12_1_set_kiq_pm4_funcs(struct amdgpu_device *adev)
225 {
226 int i, num_xcc;
227
228 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
229 for (i =0; i < num_xcc; i++)
230 adev->gfx.kiq[i].pmf = &gfx_v12_1_kiq_pm4_funcs;
231 }
232
gfx_v12_1_wait_reg_mem(struct amdgpu_ring * ring,int eng_sel,int mem_space,int opt,uint32_t addr0,uint32_t addr1,uint32_t ref,uint32_t mask,uint32_t inv)233 static void gfx_v12_1_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
234 int mem_space, int opt, uint32_t addr0,
235 uint32_t addr1, uint32_t ref,
236 uint32_t mask, uint32_t inv)
237 {
238 if (mem_space == 0) {
239 addr0 = soc_v1_0_normalize_xcc_reg_offset(addr0);
240 addr1 = soc_v1_0_normalize_xcc_reg_offset(addr1);
241 }
242
243 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
244 amdgpu_ring_write(ring,
245 /* memory (1) or register (0) */
246 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
247 WAIT_REG_MEM_OPERATION(opt) | /* wait */
248 WAIT_REG_MEM_FUNCTION(3))); /* equal */
249
250 if (mem_space)
251 BUG_ON(addr0 & 0x3); /* Dword align */
252 amdgpu_ring_write(ring, addr0);
253 amdgpu_ring_write(ring, addr1);
254 amdgpu_ring_write(ring, ref);
255 amdgpu_ring_write(ring, mask);
256 amdgpu_ring_write(ring, inv); /* poll interval */
257 }
258
gfx_v12_1_ring_test_ring(struct amdgpu_ring * ring)259 static int gfx_v12_1_ring_test_ring(struct amdgpu_ring *ring)
260 {
261 struct amdgpu_device *adev = ring->adev;
262 uint32_t scratch_reg0_offset, xcc_offset;
263 uint32_t tmp = 0;
264 unsigned i;
265 int r;
266
267 /* Use register offset which is local to XCC in the packet */
268 xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0);
269 scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0);
270 WREG32(scratch_reg0_offset, 0xCAFEDEAD);
271 tmp = RREG32(scratch_reg0_offset);
272
273 r = amdgpu_ring_alloc(ring, 5);
274 if (r) {
275 dev_err(adev->dev,
276 "amdgpu: cp failed to lock ring %d (%d).\n",
277 ring->idx, r);
278 return r;
279 }
280
281 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) {
282 gfx_v12_1_ring_emit_wreg(ring, xcc_offset, 0xDEADBEEF);
283 } else {
284 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
285 amdgpu_ring_write(ring, xcc_offset -
286 PACKET3_SET_UCONFIG_REG_START);
287 amdgpu_ring_write(ring, 0xDEADBEEF);
288 }
289 amdgpu_ring_commit(ring);
290
291 for (i = 0; i < adev->usec_timeout; i++) {
292 tmp = RREG32(scratch_reg0_offset);
293 if (tmp == 0xDEADBEEF)
294 break;
295 if (amdgpu_emu_mode == 1)
296 msleep(1);
297 else
298 udelay(1);
299 }
300
301 if (i >= adev->usec_timeout)
302 r = -ETIMEDOUT;
303 return r;
304 }
305
gfx_v12_1_ring_test_ib(struct amdgpu_ring * ring,long timeout)306 static int gfx_v12_1_ring_test_ib(struct amdgpu_ring *ring, long timeout)
307 {
308 struct amdgpu_device *adev = ring->adev;
309 struct amdgpu_ib ib;
310 struct dma_fence *f = NULL;
311 unsigned index;
312 uint64_t gpu_addr;
313 volatile uint32_t *cpu_ptr;
314 long r;
315
316 /* MES KIQ fw hasn't indirect buffer support for now */
317 if (adev->enable_mes_kiq &&
318 ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
319 return 0;
320
321 memset(&ib, 0, sizeof(ib));
322
323 r = amdgpu_device_wb_get(adev, &index);
324 if (r)
325 return r;
326
327 gpu_addr = adev->wb.gpu_addr + (index * 4);
328 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
329 cpu_ptr = &adev->wb.wb[index];
330
331 r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib);
332 if (r) {
333 dev_err(adev->dev, "amdgpu: failed to get ib (%ld).\n", r);
334 goto err1;
335 }
336
337 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
338 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
339 ib.ptr[2] = lower_32_bits(gpu_addr);
340 ib.ptr[3] = upper_32_bits(gpu_addr);
341 ib.ptr[4] = 0xDEADBEEF;
342 ib.length_dw = 5;
343
344 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
345 if (r)
346 goto err2;
347
348 r = dma_fence_wait_timeout(f, false, timeout);
349 if (r == 0) {
350 r = -ETIMEDOUT;
351 goto err2;
352 } else if (r < 0) {
353 goto err2;
354 }
355
356 if (le32_to_cpu(*cpu_ptr) == 0xDEADBEEF)
357 r = 0;
358 else
359 r = -EINVAL;
360 err2:
361 amdgpu_ib_free(&ib, NULL);
362 dma_fence_put(f);
363 err1:
364 amdgpu_device_wb_free(adev, index);
365 return r;
366 }
367
gfx_v12_1_free_microcode(struct amdgpu_device * adev)368 static void gfx_v12_1_free_microcode(struct amdgpu_device *adev)
369 {
370 amdgpu_ucode_release(&adev->gfx.rlc_fw);
371 amdgpu_ucode_release(&adev->gfx.mec_fw);
372
373 kfree(adev->gfx.rlc.register_list_format);
374 }
375
gfx_v12_1_init_toc_microcode(struct amdgpu_device * adev,const char * ucode_prefix)376 static int gfx_v12_1_init_toc_microcode(struct amdgpu_device *adev, const char *ucode_prefix)
377 {
378 const struct psp_firmware_header_v1_0 *toc_hdr;
379 int err = 0;
380
381 err = amdgpu_ucode_request(adev, &adev->psp.toc_fw,
382 AMDGPU_UCODE_REQUIRED,
383 "amdgpu/%s_toc.bin", ucode_prefix);
384 if (err)
385 goto out;
386
387 toc_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.toc_fw->data;
388 adev->psp.toc.fw_version = le32_to_cpu(toc_hdr->header.ucode_version);
389 adev->psp.toc.feature_version = le32_to_cpu(toc_hdr->sos.fw_version);
390 adev->psp.toc.size_bytes = le32_to_cpu(toc_hdr->header.ucode_size_bytes);
391 adev->psp.toc.start_addr = (uint8_t *)toc_hdr +
392 le32_to_cpu(toc_hdr->header.ucode_array_offset_bytes);
393 return 0;
394 out:
395 amdgpu_ucode_release(&adev->psp.toc_fw);
396 return err;
397 }
398
gfx_v12_1_init_microcode(struct amdgpu_device * adev)399 static int gfx_v12_1_init_microcode(struct amdgpu_device *adev)
400 {
401 char ucode_prefix[15];
402 int err;
403 const struct rlc_firmware_header_v2_0 *rlc_hdr;
404 uint16_t version_major;
405 uint16_t version_minor;
406
407 DRM_DEBUG("\n");
408
409 amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
410
411 if (!amdgpu_sriov_vf(adev)) {
412 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
413 AMDGPU_UCODE_REQUIRED,
414 "amdgpu/%s_rlc.bin", ucode_prefix);
415 if (err)
416 goto out;
417 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
418 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
419 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
420 err = amdgpu_gfx_rlc_init_microcode(adev, version_major, version_minor);
421 if (err)
422 goto out;
423 }
424
425 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw,
426 AMDGPU_UCODE_REQUIRED,
427 "amdgpu/%s_mec.bin", ucode_prefix);
428 if (err)
429 goto out;
430 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC);
431 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK);
432 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK);
433 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK);
434 amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK);
435
436 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
437 err = gfx_v12_1_init_toc_microcode(adev, ucode_prefix);
438
439 /* only one MEC for gfx 12 */
440 adev->gfx.mec2_fw = NULL;
441
442 if (adev->gfx.imu.funcs) {
443 if (adev->gfx.imu.funcs->init_microcode) {
444 err = adev->gfx.imu.funcs->init_microcode(adev);
445 if (err)
446 dev_err(adev->dev, "Failed to load imu firmware!\n");
447 }
448 }
449
450 out:
451 if (err) {
452 amdgpu_ucode_release(&adev->gfx.rlc_fw);
453 amdgpu_ucode_release(&adev->gfx.mec_fw);
454 }
455
456 return err;
457 }
458
gfx_v12_1_get_csb_size(struct amdgpu_device * adev)459 static u32 gfx_v12_1_get_csb_size(struct amdgpu_device *adev)
460 {
461 u32 count = 0;
462 const struct cs_section_def *sect = NULL;
463 const struct cs_extent_def *ext = NULL;
464
465 count += 1;
466
467 for (sect = gfx12_cs_data; sect->section != NULL; ++sect) {
468 if (sect->id == SECT_CONTEXT) {
469 for (ext = sect->section; ext->extent != NULL; ++ext)
470 count += 2 + ext->reg_count;
471 } else
472 return 0;
473 }
474
475 return count;
476 }
477
gfx_v12_1_get_csb_buffer(struct amdgpu_device * adev,u32 * buffer)478 static void gfx_v12_1_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer)
479 {
480 u32 count = 0, clustercount = 0, i;
481 const struct cs_section_def *sect = NULL;
482 const struct cs_extent_def *ext = NULL;
483
484 if (adev->gfx.rlc.cs_data == NULL)
485 return;
486 if (buffer == NULL)
487 return;
488
489 count += 1;
490
491 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
492 if (sect->id == SECT_CONTEXT) {
493 for (ext = sect->section; ext->extent != NULL; ++ext) {
494 clustercount++;
495 buffer[count++] = ext->reg_count;
496 buffer[count++] = ext->reg_index;
497
498 for (i = 0; i < ext->reg_count; i++)
499 buffer[count++] = cpu_to_le32(ext->extent[i]);
500 }
501 } else
502 return;
503 }
504
505 buffer[0] = clustercount;
506 }
507
gfx_v12_1_rlc_fini(struct amdgpu_device * adev)508 static void gfx_v12_1_rlc_fini(struct amdgpu_device *adev)
509 {
510 /* clear state block */
511 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
512 &adev->gfx.rlc.clear_state_gpu_addr,
513 (void **)&adev->gfx.rlc.cs_ptr);
514
515 /* jump table block */
516 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
517 &adev->gfx.rlc.cp_table_gpu_addr,
518 (void **)&adev->gfx.rlc.cp_table_ptr);
519 }
520
gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device * adev)521 static void gfx_v12_1_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
522 {
523 int xcc_id, num_xcc;
524 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
525
526 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
527 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
528 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)];
529
530 reg_access_ctrl->grbm_cntl =
531 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL);
532 reg_access_ctrl->grbm_idx =
533 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX);
534
535 reg_access_ctrl->vfi_cmd =
536 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_CMD);
537 reg_access_ctrl->vfi_stat =
538 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_STAT);
539 reg_access_ctrl->vfi_addr =
540 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_ADDR);
541 reg_access_ctrl->vfi_data =
542 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_DATA);
543 reg_access_ctrl->vfi_grbm_cntl =
544 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_CNTL);
545 reg_access_ctrl->vfi_grbm_idx =
546 SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_VFI_GRBM_GFX_INDEX);
547 reg_access_ctrl->vfi_grbm_cntl_data = 0;
548 reg_access_ctrl->vfi_grbm_idx_data = 0;
549 }
550 adev->gfx.rlc.rlcg_reg_access_supported = true;
551 }
552
gfx_v12_1_rlc_init(struct amdgpu_device * adev)553 static int gfx_v12_1_rlc_init(struct amdgpu_device *adev)
554 {
555 const struct cs_section_def *cs_data;
556 int r, i, num_xcc;
557
558 adev->gfx.rlc.cs_data = gfx12_cs_data;
559
560 cs_data = adev->gfx.rlc.cs_data;
561
562 if (cs_data) {
563 /* init clear state block */
564 r = amdgpu_gfx_rlc_init_csb(adev);
565 if (r)
566 return r;
567 }
568
569 /* init spm vmid with 0xf */
570 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
571 for (i = 0; i < num_xcc; i++) {
572 if (adev->gfx.rlc.funcs->update_spm_vmid)
573 adev->gfx.rlc.funcs->update_spm_vmid(adev, i, NULL, 0xf);
574 }
575
576 return 0;
577 }
578
gfx_v12_1_mec_fini(struct amdgpu_device * adev)579 static void gfx_v12_1_mec_fini(struct amdgpu_device *adev)
580 {
581 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
582 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
583 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_data_obj, NULL, NULL);
584 }
585
gfx_v12_1_mec_init(struct amdgpu_device * adev)586 static int gfx_v12_1_mec_init(struct amdgpu_device *adev)
587 {
588 int r, i, num_xcc;
589 u32 *hpd;
590 size_t mec_hpd_size;
591
592 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
593
594 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
595 for (i = 0; i < num_xcc; i++)
596 bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap,
597 AMDGPU_MAX_COMPUTE_QUEUES);
598
599 /* take ownership of the relevant compute queues */
600 amdgpu_gfx_compute_queue_acquire(adev);
601 mec_hpd_size = adev->gfx.num_compute_rings *
602 GFX12_MEC_HPD_SIZE * num_xcc;
603
604 if (mec_hpd_size) {
605 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
606 AMDGPU_GEM_DOMAIN_GTT,
607 &adev->gfx.mec.hpd_eop_obj,
608 &adev->gfx.mec.hpd_eop_gpu_addr,
609 (void **)&hpd);
610 if (r) {
611 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
612 gfx_v12_1_mec_fini(adev);
613 return r;
614 }
615
616 memset(hpd, 0, mec_hpd_size);
617
618 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
619 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
620 }
621
622 return 0;
623 }
624
wave_read_ind(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t wave,uint32_t address)625 static uint32_t wave_read_ind(struct amdgpu_device *adev,
626 uint32_t xcc_id, uint32_t wave,
627 uint32_t address)
628 {
629 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
630 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
631 (address << SQ_IND_INDEX__INDEX__SHIFT));
632 return RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
633 }
634
wave_read_regs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t wave,uint32_t thread,uint32_t regno,uint32_t num,uint32_t * out)635 static void wave_read_regs(struct amdgpu_device *adev,
636 uint32_t xcc_id, uint32_t wave,
637 uint32_t thread, uint32_t regno,
638 uint32_t num, uint32_t *out)
639 {
640 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_INDEX,
641 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
642 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
643 (thread << SQ_IND_INDEX__WORKITEM_ID__SHIFT) |
644 (SQ_IND_INDEX__AUTO_INCR_MASK));
645 while (num--)
646 *(out++) = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_IND_DATA);
647 }
648
gfx_v12_1_read_wave_data(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t * dst,int * no_fields)649 static void gfx_v12_1_read_wave_data(struct amdgpu_device *adev,
650 uint32_t xcc_id,
651 uint32_t simd, uint32_t wave,
652 uint32_t *dst, int *no_fields)
653 {
654 /* in gfx12 the SIMD_ID is specified as part of the INSTANCE
655 * field when performing a select_se_sh so it should be
656 * zero here */
657 WARN_ON(simd != 0);
658
659 /* type 4 wave data */
660 dst[(*no_fields)++] = 4;
661 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATUS);
662 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_LO);
663 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_PC_HI);
664 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_LO);
665 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXEC_HI);
666 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID1);
667 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_HW_ID2);
668 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_GPR_ALLOC);
669 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_LDS_ALLOC);
670 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS);
671 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_STS2);
672 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_IB_DBG1);
673 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_M0);
674 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_MODE);
675 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_STATE_PRIV);
676 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_PRIV);
677 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_EXCP_FLAG_USER);
678 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_TRAP_CTRL);
679 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_ACTIVE);
680 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_VALID_AND_IDLE);
681 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_LO);
682 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_DVGPR_ALLOC_HI);
683 dst[(*no_fields)++] = wave_read_ind(adev, xcc_id, wave, ixSQ_WAVE_SCHED_MODE);
684 }
685
gfx_v12_1_read_wave_sgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t start,uint32_t size,uint32_t * dst)686 static void gfx_v12_1_read_wave_sgprs(struct amdgpu_device *adev,
687 uint32_t xcc_id, uint32_t simd,
688 uint32_t wave, uint32_t start,
689 uint32_t size, uint32_t *dst)
690 {
691 WARN_ON(simd != 0);
692
693 wave_read_regs(adev, xcc_id, wave, 0,
694 start + SQIND_WAVE_SGPRS_OFFSET,
695 size, dst);
696 }
697
gfx_v12_1_read_wave_vgprs(struct amdgpu_device * adev,uint32_t xcc_id,uint32_t simd,uint32_t wave,uint32_t thread,uint32_t start,uint32_t size,uint32_t * dst)698 static void gfx_v12_1_read_wave_vgprs(struct amdgpu_device *adev,
699 uint32_t xcc_id, uint32_t simd,
700 uint32_t wave, uint32_t thread,
701 uint32_t start, uint32_t size,
702 uint32_t *dst)
703 {
704 wave_read_regs(adev, xcc_id, wave, thread,
705 start + SQIND_WAVE_VGPRS_OFFSET,
706 size, dst);
707 }
708
gfx_v12_1_select_me_pipe_q(struct amdgpu_device * adev,u32 me,u32 pipe,u32 q,u32 vm,u32 xcc_id)709 static void gfx_v12_1_select_me_pipe_q(struct amdgpu_device *adev,
710 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id)
711 {
712 soc_v1_0_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id));
713 }
714
gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device * adev)715 static int gfx_v12_1_get_xccs_per_xcp(struct amdgpu_device *adev)
716 {
717 /* Fill this in when the interface is ready */
718 return 1;
719 }
720
gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device * adev,int ih_node)721 static int gfx_v12_1_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node)
722 {
723 int logic_xcc;
724 int xcc = (ih_node & 0x7) - 2 + (ih_node >> 3) * 4;
725
726 for (logic_xcc = 0; logic_xcc < NUM_XCC(adev->gfx.xcc_mask); logic_xcc++) {
727 if (xcc == GET_INST(GC, logic_xcc))
728 return logic_xcc;
729 }
730
731 dev_err(adev->dev, "Couldn't find xcc mapping from IH node");
732 return -EINVAL;
733 }
734
735 static const struct amdgpu_gfx_funcs gfx_v12_1_gfx_funcs = {
736 .get_gpu_clock_counter = &gfx_v12_1_get_gpu_clock_counter,
737 .select_se_sh = &gfx_v12_1_xcc_select_se_sh,
738 .read_wave_data = &gfx_v12_1_read_wave_data,
739 .read_wave_sgprs = &gfx_v12_1_read_wave_sgprs,
740 .read_wave_vgprs = &gfx_v12_1_read_wave_vgprs,
741 .select_me_pipe_q = &gfx_v12_1_select_me_pipe_q,
742 .update_perfmon_mgcg = &gfx_v12_1_update_perf_clk,
743 .get_xccs_per_xcp = &gfx_v12_1_get_xccs_per_xcp,
744 .ih_node_to_logical_xcc = &gfx_v12_1_ih_to_xcc_inst,
745 };
746
gfx_v12_1_gpu_early_init(struct amdgpu_device * adev)747 static int gfx_v12_1_gpu_early_init(struct amdgpu_device *adev)
748 {
749 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
750 case IP_VERSION(12, 1, 0):
751 adev->gfx.config.max_hw_contexts = 8;
752 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
753 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
754 adev->gfx.config.sc_hiz_tile_fifo_size = 0;
755 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
756 break;
757 default:
758 BUG();
759 break;
760 }
761
762 return 0;
763 }
764
gfx_v12_1_compute_ring_init(struct amdgpu_device * adev,int ring_id,int xcc_id,int mec,int pipe,int queue)765 static int gfx_v12_1_compute_ring_init(struct amdgpu_device *adev, int ring_id,
766 int xcc_id, int mec, int pipe, int queue)
767 {
768 int r;
769 unsigned irq_type;
770 struct amdgpu_ring *ring;
771 unsigned int hw_prio;
772 uint32_t xcc_doorbell_start;
773
774 ring = &adev->gfx.compute_ring[xcc_id * adev->gfx.num_compute_rings +
775 ring_id];
776
777 /* mec0 is me1 */
778 ring->xcc_id = xcc_id;
779 ring->me = mec + 1;
780 ring->pipe = pipe;
781 ring->queue = queue;
782
783 ring->ring_obj = NULL;
784 ring->use_doorbell = true;
785 xcc_doorbell_start = adev->doorbell_index.mec_ring0 +
786 xcc_id * adev->doorbell_index.xcc_doorbell_range;
787 ring->doorbell_index = (xcc_doorbell_start + ring_id) << 1;
788 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
789 (ring_id + xcc_id * adev->gfx.num_compute_rings) *
790 GFX12_MEC_HPD_SIZE;
791 ring->vm_hub = AMDGPU_GFXHUB(xcc_id);
792 sprintf(ring->name, "comp_%d.%d.%d.%d",
793 ring->xcc_id, ring->me, ring->pipe, ring->queue);
794
795 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
796 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
797 + ring->pipe;
798 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
799 AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
800 /* type-2 packets are deprecated on MEC, use type-3 instead */
801 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
802 hw_prio, NULL);
803 if (r)
804 return r;
805
806 return 0;
807 }
808
809 static struct {
810 SOC24_FIRMWARE_ID id;
811 unsigned int offset;
812 unsigned int size;
813 unsigned int size_x16;
814 unsigned int num_inst;
815 } rlc_autoload_info[SOC24_FIRMWARE_ID_MAX];
816
817 #define RLC_TOC_OFFSET_DWUNIT 8
818 #define RLC_SIZE_MULTIPLE 1024
819 #define RLC_TOC_UMF_SIZE_inM 23ULL
820 #define RLC_TOC_FORMAT_API 165ULL
821
822 #define RLC_NUM_INS_CODE0 1
823 #define RLC_NUM_INS_CODE1 8
824 #define RLC_NUM_INS_CODE2 2
825 #define RLC_NUM_INS_CODE3 16
826
gfx_v12_1_parse_rlc_toc(struct amdgpu_device * adev,void * rlc_toc)827 static void gfx_v12_1_parse_rlc_toc(struct amdgpu_device *adev, void *rlc_toc)
828 {
829 RLC_TABLE_OF_CONTENT_V2 *ucode = rlc_toc;
830
831 while (ucode && (ucode->id > SOC24_FIRMWARE_ID_INVALID)) {
832 rlc_autoload_info[ucode->id].id = ucode->id;
833 rlc_autoload_info[ucode->id].offset =
834 ucode->offset * RLC_TOC_OFFSET_DWUNIT * 4;
835 rlc_autoload_info[ucode->id].size =
836 ucode->size_x16 ? ucode->size * RLC_SIZE_MULTIPLE * 4 :
837 ucode->size * 4;
838 switch (ucode->vfflr_image_code) {
839 case 0:
840 rlc_autoload_info[ucode->id].num_inst =
841 RLC_NUM_INS_CODE0;
842 break;
843 case 1:
844 rlc_autoload_info[ucode->id].num_inst =
845 RLC_NUM_INS_CODE1;
846 break;
847 case 2:
848 rlc_autoload_info[ucode->id].num_inst =
849 RLC_NUM_INS_CODE2;
850 break;
851 case 3:
852 rlc_autoload_info[ucode->id].num_inst =
853 RLC_NUM_INS_CODE3;
854 break;
855 default:
856 dev_err(adev->dev,
857 "Invalid Instance number detected\n");
858 break;
859 }
860 ucode++;
861 }
862 }
863
gfx_v12_1_calc_toc_total_size(struct amdgpu_device * adev)864 static uint32_t gfx_v12_1_calc_toc_total_size(struct amdgpu_device *adev)
865 {
866 uint32_t total_size = 0;
867 SOC24_FIRMWARE_ID id;
868
869 gfx_v12_1_parse_rlc_toc(adev, adev->psp.toc.start_addr);
870
871 for (id = SOC24_FIRMWARE_ID_RLC_G_UCODE; id < SOC24_FIRMWARE_ID_MAX; id++)
872 total_size += rlc_autoload_info[id].size;
873
874 /* In case the offset in rlc toc ucode is aligned */
875 if (total_size < rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset)
876 total_size = rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].offset +
877 rlc_autoload_info[SOC24_FIRMWARE_ID_MAX-1].size;
878 if (total_size < (RLC_TOC_UMF_SIZE_inM << 20))
879 total_size = RLC_TOC_UMF_SIZE_inM << 20;
880
881 return total_size;
882 }
883
gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device * adev)884 static int gfx_v12_1_rlc_autoload_buffer_init(struct amdgpu_device *adev)
885 {
886 int r;
887 uint32_t total_size;
888
889 total_size = gfx_v12_1_calc_toc_total_size(adev);
890
891 r = amdgpu_bo_create_reserved(adev, total_size, 64 * 1024,
892 AMDGPU_GEM_DOMAIN_VRAM,
893 &adev->gfx.rlc.rlc_autoload_bo,
894 &adev->gfx.rlc.rlc_autoload_gpu_addr,
895 (void **)&adev->gfx.rlc.rlc_autoload_ptr);
896
897 if (r) {
898 dev_err(adev->dev, "(%d) failed to create fw autoload bo\n", r);
899 return r;
900 }
901
902 return 0;
903 }
904
gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device * adev,SOC24_FIRMWARE_ID id,const void * fw_data,uint32_t fw_size)905 static void gfx_v12_1_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *adev,
906 SOC24_FIRMWARE_ID id,
907 const void *fw_data,
908 uint32_t fw_size)
909 {
910 uint32_t toc_offset;
911 uint32_t toc_fw_size, toc_fw_inst_size;
912 char *ptr = adev->gfx.rlc.rlc_autoload_ptr;
913 int i, num_inst;
914
915 if (id <= SOC24_FIRMWARE_ID_INVALID || id >= SOC24_FIRMWARE_ID_MAX)
916 return;
917
918 toc_offset = rlc_autoload_info[id].offset;
919 toc_fw_size = rlc_autoload_info[id].size;
920 num_inst = rlc_autoload_info[id].num_inst;
921 toc_fw_inst_size = toc_fw_size / num_inst;
922
923 if (fw_size == 0)
924 fw_size = toc_fw_inst_size;
925
926 if (fw_size > toc_fw_inst_size)
927 fw_size = toc_fw_inst_size;
928
929 for (i = 0; i < num_inst; i++) {
930 if ((num_inst == RLC_NUM_INS_CODE0) ||
931 ((1 << (i / 2)) & adev->gfx.xcc_mask)) {
932 memcpy(ptr + toc_offset + i * toc_fw_inst_size, fw_data, fw_size);
933
934 if (fw_size < toc_fw_inst_size)
935 memset(ptr + toc_offset + fw_size + i * toc_fw_inst_size,
936 0, toc_fw_inst_size - fw_size);
937 }
938 }
939 }
940
941 static void
gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device * adev)942 gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev)
943 {
944 void *data;
945 uint32_t size;
946 uint32_t *toc_ptr;
947
948 data = adev->psp.toc.start_addr;
949 size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_TOC].size;
950
951 toc_ptr = (uint32_t *)data + size / 4 - 2;
952 *toc_ptr = (RLC_TOC_FORMAT_API << 24) | 0x1;
953
954 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_TOC,
955 data, size);
956 }
957
958 static void
gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device * adev)959 gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(struct amdgpu_device *adev)
960 {
961 const __le32 *fw_data;
962 uint32_t fw_size;
963 const struct gfx_firmware_header_v2_0 *cpv2_hdr;
964 const struct rlc_firmware_header_v2_0 *rlc_hdr;
965 const struct rlc_firmware_header_v2_1 *rlcv21_hdr;
966 const struct rlc_firmware_header_v2_2 *rlcv22_hdr;
967 uint16_t version_major, version_minor;
968
969 /* mec ucode */
970 cpv2_hdr = (const struct gfx_firmware_header_v2_0 *)
971 adev->gfx.mec_fw->data;
972 /* instruction */
973 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
974 le32_to_cpu(cpv2_hdr->ucode_offset_bytes));
975 fw_size = le32_to_cpu(cpv2_hdr->ucode_size_bytes);
976 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC,
977 fw_data, fw_size);
978 /* data */
979 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
980 le32_to_cpu(cpv2_hdr->data_offset_bytes));
981 fw_size = le32_to_cpu(cpv2_hdr->data_size_bytes);
982 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK,
983 fw_data, fw_size);
984 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK,
985 fw_data, fw_size);
986 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK,
987 fw_data, fw_size);
988 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK,
989 fw_data, fw_size);
990
991 /* rlc ucode */
992 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)
993 adev->gfx.rlc_fw->data;
994 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
995 le32_to_cpu(rlc_hdr->header.ucode_array_offset_bytes));
996 fw_size = le32_to_cpu(rlc_hdr->header.ucode_size_bytes);
997 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_G_UCODE,
998 fw_data, fw_size);
999
1000 version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1001 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1002 if (version_major == 2) {
1003 if (version_minor >= 1) {
1004 rlcv21_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1005
1006 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1007 le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_offset_bytes));
1008 fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_gpm_size_bytes);
1009 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLCG_SCRATCH,
1010 fw_data, fw_size);
1011
1012 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1013 le32_to_cpu(rlcv21_hdr->save_restore_list_srm_offset_bytes));
1014 fw_size = le32_to_cpu(rlcv21_hdr->save_restore_list_srm_size_bytes);
1015 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLC_SRM_ARAM,
1016 fw_data, fw_size);
1017 }
1018 if (version_minor >= 2) {
1019 rlcv22_hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1020
1021 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1022 le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_offset_bytes));
1023 fw_size = le32_to_cpu(rlcv22_hdr->rlc_iram_ucode_size_bytes);
1024 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_UCODE,
1025 fw_data, fw_size);
1026
1027 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1028 le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_offset_bytes));
1029 fw_size = le32_to_cpu(rlcv22_hdr->rlc_dram_ucode_size_bytes);
1030 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT,
1031 fw_data, fw_size);
1032 }
1033 }
1034 }
1035
1036 static void
gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device * adev)1037 gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(struct amdgpu_device *adev)
1038 {
1039 const __le32 *fw_data;
1040 uint32_t fw_size;
1041 const struct sdma_firmware_header_v3_0 *sdma_hdr;
1042
1043 if (adev->sdma.instance[0].fw) {
1044 sdma_hdr = (const struct sdma_firmware_header_v3_0 *)
1045 adev->sdma.instance[0].fw->data;
1046 fw_data = (const __le32 *) (adev->sdma.instance[0].fw->data +
1047 le32_to_cpu(sdma_hdr->ucode_offset_bytes));
1048 fw_size = le32_to_cpu(sdma_hdr->ucode_size_bytes);
1049
1050 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, SOC24_FIRMWARE_ID_SDMA_UCODE_TH0,
1051 fw_data, fw_size);
1052 }
1053 }
1054
1055 static void
gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device * adev)1056 gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(struct amdgpu_device *adev)
1057 {
1058 const __le32 *fw_data;
1059 unsigned fw_size;
1060 const struct mes_firmware_header_v1_0 *mes_hdr;
1061 int pipe, ucode_id, data_id;
1062
1063 for (pipe = 0; pipe < 2; pipe++) {
1064 if (pipe == 0) {
1065 ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P0;
1066 data_id = SOC24_FIRMWARE_ID_RS64_MES_P0_STACK;
1067 } else {
1068 ucode_id = SOC24_FIRMWARE_ID_RS64_MES_P1;
1069 data_id = SOC24_FIRMWARE_ID_RS64_MES_P1_STACK;
1070 }
1071
1072 mes_hdr = (const struct mes_firmware_header_v1_0 *)
1073 adev->mes.fw[pipe]->data;
1074
1075 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1076 le32_to_cpu(mes_hdr->mes_ucode_offset_bytes));
1077 fw_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
1078
1079 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, ucode_id, fw_data, fw_size);
1080
1081 fw_data = (const __le32 *)(adev->mes.fw[pipe]->data +
1082 le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes));
1083 fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes);
1084
1085 gfx_v12_1_rlc_backdoor_autoload_copy_ucode(adev, data_id, fw_data, fw_size);
1086 }
1087 }
1088
gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device * adev)1089 static int gfx_v12_1_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
1090 {
1091 uint32_t rlc_g_offset, rlc_g_size;
1092 uint64_t gpu_addr;
1093 uint32_t data;
1094 int i, num_xcc;
1095
1096 /* RLC autoload sequence 2: copy ucode */
1097 gfx_v12_1_rlc_backdoor_autoload_copy_sdma_ucode(adev);
1098 gfx_v12_1_rlc_backdoor_autoload_copy_gfx_ucode(adev);
1099 gfx_v12_1_rlc_backdoor_autoload_copy_mes_ucode(adev);
1100 gfx_v12_1_rlc_backdoor_autoload_copy_toc_ucode(adev);
1101
1102 rlc_g_offset = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].offset;
1103 rlc_g_size = rlc_autoload_info[SOC24_FIRMWARE_ID_RLC_G_UCODE].size;
1104 gpu_addr = adev->gfx.rlc.rlc_autoload_gpu_addr + rlc_g_offset - adev->gmc.vram_start;
1105
1106 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1107 for (i = 0; i < num_xcc; i++) {
1108 WREG32_SOC15(GC, GET_INST(GC, i),
1109 regGFX_IMU_RLC_BOOTLOADER_ADDR_HI,
1110 upper_32_bits(gpu_addr));
1111 WREG32_SOC15(GC, GET_INST(GC, i),
1112 regGFX_IMU_RLC_BOOTLOADER_ADDR_LO,
1113 lower_32_bits(gpu_addr));
1114 WREG32_SOC15(GC, GET_INST(GC, i),
1115 regGFX_IMU_RLC_BOOTLOADER_SIZE,
1116 rlc_g_size);
1117 }
1118
1119 if (adev->gfx.imu.funcs) {
1120 /* RLC autoload sequence 3: load IMU fw */
1121 if (adev->gfx.imu.funcs->load_microcode)
1122 adev->gfx.imu.funcs->load_microcode(adev);
1123 }
1124
1125 /* unhalt rlc to start autoload */
1126 for (i = 0; i < num_xcc; i++) {
1127 data = RREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE);
1128 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD0_ENABLE, 1);
1129 data = REG_SET_FIELD(data, RLC_GPM_THREAD_ENABLE, THREAD1_ENABLE, 1);
1130 WREG32_SOC15(GC, GET_INST(GC, i), regRLC_GPM_THREAD_ENABLE, data);
1131 WREG32_SOC15(GC, GET_INST(GC, i), regRLC_CNTL, RLC_CNTL__RLC_ENABLE_F32_MASK);
1132 }
1133
1134 return 0;
1135 }
1136
gfx_v12_1_sw_init(struct amdgpu_ip_block * ip_block)1137 static int gfx_v12_1_sw_init(struct amdgpu_ip_block *ip_block)
1138 {
1139 int i, j, k, r, ring_id = 0;
1140 unsigned num_compute_rings;
1141 int xcc_id, num_xcc;
1142 struct amdgpu_device *adev = ip_block->adev;
1143
1144 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
1145 case IP_VERSION(12, 1, 0):
1146 adev->gfx.mec.num_mec = 1;
1147 adev->gfx.mec.num_pipe_per_mec = 4;
1148 adev->gfx.mec.num_queue_per_pipe = 8;
1149 break;
1150 default:
1151 adev->gfx.mec.num_mec = 2;
1152 adev->gfx.mec.num_pipe_per_mec = 2;
1153 adev->gfx.mec.num_queue_per_pipe = 4;
1154 break;
1155 }
1156
1157 if (adev->gfx.num_compute_rings) {
1158 /* recalculate compute rings to use based on hardware configuration */
1159 num_compute_rings = (adev->gfx.mec.num_pipe_per_mec *
1160 adev->gfx.mec.num_queue_per_pipe) / 2;
1161 adev->gfx.num_compute_rings = min(adev->gfx.num_compute_rings,
1162 num_compute_rings);
1163 }
1164
1165 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1166
1167 /* EOP Event */
1168 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1169 GFX_12_1_0__SRCID__CP_EOP_INTERRUPT,
1170 &adev->gfx.eop_irq);
1171 if (r)
1172 return r;
1173
1174 /* Privileged reg */
1175 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1176 GFX_12_1_0__SRCID__CP_PRIV_REG_FAULT,
1177 &adev->gfx.priv_reg_irq);
1178 if (r)
1179 return r;
1180
1181 /* Privileged inst */
1182 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_GRBM_CP,
1183 GFX_12_1_0__SRCID__CP_PRIV_INSTR_FAULT,
1184 &adev->gfx.priv_inst_irq);
1185 if (r)
1186 return r;
1187
1188 /* RLC POISON Error */
1189 r = amdgpu_irq_add_id(adev, SOC_V1_0_IH_CLIENTID_RLC,
1190 GFX_12_1_0__SRCID__RLC_POISON_INTERRUPT,
1191 &adev->gfx.rlc_poison_irq);
1192 if (r)
1193 return r;
1194
1195 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1196
1197 r = gfx_v12_1_rlc_init(adev);
1198 if (r) {
1199 dev_err(adev->dev, "Failed to init rlc BOs!\n");
1200 return r;
1201 }
1202
1203 r = gfx_v12_1_mec_init(adev);
1204 if (r) {
1205 dev_err(adev->dev, "Failed to init MEC BOs!\n");
1206 return r;
1207 }
1208
1209 /* set up the compute queues - allocate horizontally across pipes */
1210 for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) {
1211 ring_id = 0;
1212 for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1213 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1214 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1215 if (!amdgpu_gfx_is_mec_queue_enabled(adev,
1216 xcc_id, i, k, j))
1217 continue;
1218
1219 r = gfx_v12_1_compute_ring_init(adev, ring_id,
1220 xcc_id, i, k, j);
1221 if (r)
1222 return r;
1223
1224 ring_id++;
1225 }
1226 }
1227 }
1228
1229 if (!adev->enable_mes_kiq) {
1230 r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, xcc_id);
1231 if (r) {
1232 dev_err(adev->dev, "Failed to init KIQ BOs!\n");
1233 return r;
1234 }
1235
1236 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
1237 if (r)
1238 return r;
1239 }
1240
1241 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v12_1_compute_mqd), xcc_id);
1242 if (r)
1243 return r;
1244 }
1245
1246 /* allocate visible FB for rlc auto-loading fw */
1247 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1248 r = gfx_v12_1_rlc_autoload_buffer_init(adev);
1249 if (r)
1250 return r;
1251 } else if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1252 r = gfx_v12_1_init_cp_compute_microcode_bo(adev);
1253 if (r)
1254 return r;
1255 }
1256
1257 r = gfx_v12_1_gpu_early_init(adev);
1258 if (r)
1259 return r;
1260
1261 r = amdgpu_gfx_sysfs_init(adev);
1262 if (r)
1263 return r;
1264
1265 return 0;
1266 }
1267
gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device * adev)1268 static void gfx_v12_1_rlc_autoload_buffer_fini(struct amdgpu_device *adev)
1269 {
1270 amdgpu_bo_free_kernel(&adev->gfx.rlc.rlc_autoload_bo,
1271 &adev->gfx.rlc.rlc_autoload_gpu_addr,
1272 (void **)&adev->gfx.rlc.rlc_autoload_ptr);
1273 }
1274
gfx_v12_1_sw_fini(struct amdgpu_ip_block * ip_block)1275 static int gfx_v12_1_sw_fini(struct amdgpu_ip_block *ip_block)
1276 {
1277 int i, num_xcc;
1278 struct amdgpu_device *adev = ip_block->adev;
1279
1280 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1281 for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++)
1282 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1283
1284 for (i = 0; i < num_xcc; i++) {
1285 amdgpu_gfx_mqd_sw_fini(adev, i);
1286
1287 if (!adev->enable_mes_kiq) {
1288 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring);
1289 amdgpu_gfx_kiq_fini(adev, i);
1290 }
1291 }
1292
1293 gfx_v12_1_rlc_fini(adev);
1294 gfx_v12_1_mec_fini(adev);
1295
1296 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
1297 gfx_v12_1_rlc_autoload_buffer_fini(adev);
1298
1299 gfx_v12_1_free_microcode(adev);
1300 amdgpu_gfx_sysfs_fini(adev);
1301
1302 return 0;
1303 }
1304
gfx_v12_1_xcc_select_se_sh(struct amdgpu_device * adev,u32 se_num,u32 sh_num,u32 instance,int xcc_id)1305 static void gfx_v12_1_xcc_select_se_sh(struct amdgpu_device *adev, u32 se_num,
1306 u32 sh_num, u32 instance, int xcc_id)
1307 {
1308 u32 data;
1309
1310 if (instance == 0xffffffff)
1311 data = REG_SET_FIELD(0, GRBM_GFX_INDEX,
1312 INSTANCE_BROADCAST_WRITES, 1);
1313 else
1314 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX,
1315 instance);
1316
1317 if (se_num == 0xffffffff)
1318 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES,
1319 1);
1320 else
1321 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1322
1323 if (sh_num == 0xffffffff)
1324 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES,
1325 1);
1326 else
1327 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX, sh_num);
1328
1329 WREG32_SOC15_RLC_SHADOW_EX(reg, GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX, data);
1330 }
1331
gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device * adev,int xcc_id)1332 static u32 gfx_v12_1_get_sa_active_bitmap(struct amdgpu_device *adev,
1333 int xcc_id)
1334 {
1335 u32 gc_disabled_sa_mask, gc_user_disabled_sa_mask, sa_mask;
1336
1337 gc_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SA_UNIT_DISABLE);
1338 gc_disabled_sa_mask = REG_GET_FIELD(gc_disabled_sa_mask,
1339 CC_GC_SA_UNIT_DISABLE,
1340 SA_DISABLE);
1341 gc_user_disabled_sa_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SA_UNIT_DISABLE);
1342 gc_user_disabled_sa_mask = REG_GET_FIELD(gc_user_disabled_sa_mask,
1343 GC_USER_SA_UNIT_DISABLE,
1344 SA_DISABLE);
1345 sa_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_sh_per_se *
1346 adev->gfx.config.max_shader_engines);
1347
1348 return sa_mask & (~(gc_disabled_sa_mask | gc_user_disabled_sa_mask));
1349 }
1350
gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device * adev,int xcc_id)1351 static u32 gfx_v12_1_get_rb_active_bitmap(struct amdgpu_device *adev,
1352 int xcc_id)
1353 {
1354 u32 gc_disabled_rb_mask, gc_user_disabled_rb_mask;
1355 u32 rb_mask;
1356
1357 gc_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1358 regCC_RB_BACKEND_DISABLE);
1359 gc_disabled_rb_mask = REG_GET_FIELD(gc_disabled_rb_mask,
1360 CC_RB_BACKEND_DISABLE,
1361 BACKEND_DISABLE);
1362 gc_user_disabled_rb_mask = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1363 regGC_USER_RB_BACKEND_DISABLE);
1364 gc_user_disabled_rb_mask = REG_GET_FIELD(gc_user_disabled_rb_mask,
1365 GC_USER_RB_BACKEND_DISABLE,
1366 BACKEND_DISABLE);
1367 rb_mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se *
1368 adev->gfx.config.max_shader_engines);
1369
1370 return rb_mask & (~(gc_disabled_rb_mask | gc_user_disabled_rb_mask));
1371 }
1372
gfx_v12_1_setup_rb(struct amdgpu_device * adev)1373 static void gfx_v12_1_setup_rb(struct amdgpu_device *adev)
1374 {
1375 u32 rb_bitmap_width_per_sa;
1376 u32 max_sa;
1377 u32 active_sa_bitmap;
1378 u32 global_active_rb_bitmap;
1379 u32 active_rb_bitmap = 0;
1380 u32 i;
1381 int xcc_id;
1382
1383 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
1384 /* query sa bitmap from SA_UNIT_DISABLE registers */
1385 active_sa_bitmap = gfx_v12_1_get_sa_active_bitmap(adev, xcc_id);
1386 /* query rb bitmap from RB_BACKEND_DISABLE registers */
1387 global_active_rb_bitmap = gfx_v12_1_get_rb_active_bitmap(adev, xcc_id);
1388
1389 /* generate active rb bitmap according to active sa bitmap */
1390 max_sa = adev->gfx.config.max_shader_engines *
1391 adev->gfx.config.max_sh_per_se;
1392 rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se /
1393 adev->gfx.config.max_sh_per_se;
1394 for (i = 0; i < max_sa; i++) {
1395 if (active_sa_bitmap & (1 << i))
1396 active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
1397 }
1398
1399 active_rb_bitmap |= global_active_rb_bitmap;
1400 }
1401
1402 adev->gfx.config.backend_enable_mask = active_rb_bitmap;
1403 adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
1404 }
1405
gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device * adev,int xcc_id)1406 static void gfx_v12_1_xcc_init_compute_vmid(struct amdgpu_device *adev,
1407 int xcc_id)
1408 {
1409 int i;
1410 uint32_t sh_mem_bases;
1411 uint32_t data;
1412
1413 /*
1414 * Configure apertures:
1415 * LDS: 0x20000000'00000000 - 0x20000001'00000000 (4GB)
1416 * Scratch: 0x10000000'00000000 - 0x11ffffff'ffffffff (128PB 57-bit)
1417 */
1418 sh_mem_bases = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1419 (adev->gmc.private_aperture_start >> 58));
1420 sh_mem_bases = REG_SET_FIELD(sh_mem_bases, SH_MEM_BASES, SHARED_BASE,
1421 (adev->gmc.shared_aperture_start >> 48));
1422
1423 mutex_lock(&adev->srbm_mutex);
1424 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
1425 soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
1426 /* CP and shaders */
1427 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1428 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases);
1429
1430 /* Enable trap for each kfd vmid. */
1431 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL);
1432 data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
1433 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data);
1434
1435 /* Disable VGPR deallocation instruction for each KFD vmid. */
1436 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG);
1437 data = REG_SET_FIELD(data, SQ_DEBUG, DISABLE_VGPR_DEALLOC, 1);
1438 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_DEBUG, data);
1439 }
1440 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1441 mutex_unlock(&adev->srbm_mutex);
1442 }
1443
gfx_v12_1_tcp_harvest(struct amdgpu_device * adev)1444 static void gfx_v12_1_tcp_harvest(struct amdgpu_device *adev)
1445 {
1446 /* TODO: harvest feature to be added later. */
1447 }
1448
gfx_v12_1_get_tcc_info(struct amdgpu_device * adev)1449 static void gfx_v12_1_get_tcc_info(struct amdgpu_device *adev)
1450 {
1451 }
1452
gfx_v12_1_xcc_constants_init(struct amdgpu_device * adev,int xcc_id)1453 static void gfx_v12_1_xcc_constants_init(struct amdgpu_device *adev,
1454 int xcc_id)
1455 {
1456 u32 tmp;
1457 int i;
1458
1459 /* XXX SH_MEM regs */
1460 /* where to put LDS, scratch, GPUVM in FSA64 space */
1461 mutex_lock(&adev->srbm_mutex);
1462 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) {
1463 soc_v1_0_grbm_select(adev, 0, 0, 0, i, GET_INST(GC, xcc_id));
1464 /* CP and shaders */
1465 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1466 regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG);
1467 if (i != 0) {
1468 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1469 (adev->gmc.private_aperture_start >> 58));
1470 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1471 (adev->gmc.shared_aperture_start >> 48));
1472 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, tmp);
1473 }
1474 }
1475 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1476
1477 mutex_unlock(&adev->srbm_mutex);
1478
1479 gfx_v12_1_xcc_init_compute_vmid(adev, xcc_id);
1480 }
1481
gfx_v12_1_constants_init(struct amdgpu_device * adev)1482 static void gfx_v12_1_constants_init(struct amdgpu_device *adev)
1483 {
1484 int i, num_xcc;
1485
1486 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1487
1488 gfx_v12_1_setup_rb(adev);
1489 gfx_v12_1_get_cu_info(adev, &adev->gfx.cu_info);
1490 gfx_v12_1_get_tcc_info(adev);
1491 adev->gfx.config.pa_sc_tile_steering_override = 0;
1492
1493 for (i = 0; i < num_xcc; i++)
1494 gfx_v12_1_xcc_constants_init(adev, i);
1495 }
1496
gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device * adev,bool enable,int xcc_id)1497 static void gfx_v12_1_xcc_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1498 bool enable, int xcc_id)
1499 {
1500 u32 tmp;
1501
1502 if (amdgpu_sriov_vf(adev))
1503 return;
1504
1505 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0);
1506
1507 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE,
1508 enable ? 1 : 0);
1509 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE,
1510 enable ? 1 : 0);
1511 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE,
1512 enable ? 1 : 0);
1513 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE,
1514 enable ? 1 : 0);
1515
1516 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL_RING0, tmp);
1517 }
1518
gfx_v12_1_xcc_init_csb(struct amdgpu_device * adev,int xcc_id)1519 static int gfx_v12_1_xcc_init_csb(struct amdgpu_device *adev,
1520 int xcc_id)
1521 {
1522 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
1523
1524 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_HI,
1525 adev->gfx.rlc.clear_state_gpu_addr >> 32);
1526 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CSIB_ADDR_LO,
1527 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
1528 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1529 regRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size);
1530
1531 return 0;
1532 }
1533
gfx_v12_1_xcc_rlc_stop(struct amdgpu_device * adev,int xcc_id)1534 static void gfx_v12_1_xcc_rlc_stop(struct amdgpu_device *adev,
1535 int xcc_id)
1536 {
1537 u32 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL);
1538
1539 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1540 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CNTL, tmp);
1541 }
1542
gfx_v12_1_rlc_stop(struct amdgpu_device * adev)1543 static void gfx_v12_1_rlc_stop(struct amdgpu_device *adev)
1544 {
1545 int i, num_xcc;
1546
1547 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1548 for (i = 0; i < num_xcc; i++)
1549 gfx_v12_1_xcc_rlc_stop(adev, i);
1550 }
1551
gfx_v12_1_xcc_rlc_reset(struct amdgpu_device * adev,int xcc_id)1552 static void gfx_v12_1_xcc_rlc_reset(struct amdgpu_device *adev,
1553 int xcc_id)
1554 {
1555 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
1556 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1557 udelay(50);
1558 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id),
1559 GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1560 udelay(50);
1561 }
1562
gfx_v12_1_rlc_reset(struct amdgpu_device * adev)1563 static void gfx_v12_1_rlc_reset(struct amdgpu_device *adev)
1564 {
1565 int i, num_xcc;
1566
1567 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1568 for (i = 0; i < num_xcc; i++)
1569 gfx_v12_1_xcc_rlc_reset(adev, i);
1570 }
1571
gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device * adev,bool enable,int xcc_id)1572 static void gfx_v12_1_xcc_rlc_smu_handshake_cntl(struct amdgpu_device *adev,
1573 bool enable, int xcc_id)
1574 {
1575 uint32_t rlc_pg_cntl;
1576
1577 rlc_pg_cntl = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL);
1578
1579 if (!enable) {
1580 /* RLC_PG_CNTL[23] = 0 (default)
1581 * RLC will wait for handshake acks with SMU
1582 * GFXOFF will be enabled
1583 * RLC_PG_CNTL[23] = 1
1584 * RLC will not issue any message to SMU
1585 * hence no handshake between SMU & RLC
1586 * GFXOFF will be disabled
1587 */
1588 rlc_pg_cntl |= RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1589 } else
1590 rlc_pg_cntl &= ~RLC_PG_CNTL__SMU_HANDSHAKE_DISABLE_MASK;
1591 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, rlc_pg_cntl);
1592 }
1593
gfx_v12_1_xcc_rlc_start(struct amdgpu_device * adev,int xcc_id)1594 static void gfx_v12_1_xcc_rlc_start(struct amdgpu_device *adev,
1595 int xcc_id)
1596 {
1597 /* TODO: enable rlc & smu handshake until smu
1598 * and gfxoff feature works as expected */
1599 if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK))
1600 gfx_v12_1_xcc_rlc_smu_handshake_cntl(adev, false, xcc_id);
1601
1602 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), RLC_CNTL, RLC_ENABLE_F32, 1);
1603 udelay(50);
1604 }
1605
gfx_v12_1_rlc_start(struct amdgpu_device * adev)1606 static void gfx_v12_1_rlc_start(struct amdgpu_device *adev)
1607 {
1608 int i, num_xcc;
1609
1610 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1611 for (i = 0; i < num_xcc; i++) {
1612 gfx_v12_1_xcc_rlc_start(adev, i);
1613 }
1614 }
1615
gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device * adev,int xcc_id)1616 static void gfx_v12_1_xcc_rlc_enable_srm(struct amdgpu_device *adev,
1617 int xcc_id)
1618 {
1619 uint32_t tmp;
1620
1621 /* enable Save Restore Machine */
1622 tmp = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL));
1623 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
1624 tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
1625 WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SRM_CNTL), tmp);
1626 }
1627
gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device * adev,int xcc_id)1628 static void gfx_v12_1_xcc_load_rlcg_microcode(struct amdgpu_device *adev,
1629 int xcc_id)
1630 {
1631 const struct rlc_firmware_header_v2_0 *hdr;
1632 const __le32 *fw_data;
1633 unsigned i, fw_size;
1634
1635 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1636 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1637 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1638 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1639
1640 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_GPM_UCODE_ADDR,
1641 RLCG_UCODE_LOADING_START_ADDRESS);
1642
1643 for (i = 0; i < fw_size; i++)
1644 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1645 regRLC_GPM_UCODE_DATA,
1646 le32_to_cpup(fw_data++));
1647
1648 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1649 regRLC_GPM_UCODE_ADDR,
1650 adev->gfx.rlc_fw_version);
1651 }
1652
gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device * adev,int xcc_id)1653 static void gfx_v12_1_xcc_load_rlc_iram_dram_microcode(struct amdgpu_device *adev,
1654 int xcc_id)
1655 {
1656 const struct rlc_firmware_header_v2_2 *hdr;
1657 const __le32 *fw_data;
1658 unsigned i, fw_size;
1659 u32 tmp;
1660
1661 hdr = (const struct rlc_firmware_header_v2_2 *)adev->gfx.rlc_fw->data;
1662
1663 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1664 le32_to_cpu(hdr->rlc_iram_ucode_offset_bytes));
1665 fw_size = le32_to_cpu(hdr->rlc_iram_ucode_size_bytes) / 4;
1666
1667 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_IRAM_ADDR, 0);
1668
1669 for (i = 0; i < fw_size; i++) {
1670 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1671 msleep(1);
1672 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1673 regRLC_LX6_IRAM_DATA,
1674 le32_to_cpup(fw_data++));
1675 }
1676
1677 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1678 regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1679
1680 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1681 le32_to_cpu(hdr->rlc_dram_ucode_offset_bytes));
1682 fw_size = le32_to_cpu(hdr->rlc_dram_ucode_size_bytes) / 4;
1683
1684 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1685 regRLC_LX6_DRAM_ADDR, 0);
1686 for (i = 0; i < fw_size; i++) {
1687 if ((amdgpu_emu_mode == 1) && (i % 100 == 99))
1688 msleep(1);
1689 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1690 regRLC_LX6_DRAM_DATA,
1691 le32_to_cpup(fw_data++));
1692 }
1693
1694 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
1695 regRLC_LX6_IRAM_ADDR, adev->gfx.rlc_fw_version);
1696
1697 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL);
1698 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, PDEBUG_ENABLE, 1);
1699 tmp = REG_SET_FIELD(tmp, RLC_LX6_CNTL, BRESET, 0);
1700 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_LX6_CNTL, tmp);
1701 }
1702
gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device * adev,int xcc_id)1703 static int gfx_v12_1_xcc_rlc_load_microcode(struct amdgpu_device *adev,
1704 int xcc_id)
1705 {
1706 const struct rlc_firmware_header_v2_0 *hdr;
1707 uint16_t version_major;
1708 uint16_t version_minor;
1709
1710 if (!adev->gfx.rlc_fw)
1711 return -EINVAL;
1712
1713 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1714 amdgpu_ucode_print_rlc_hdr(&hdr->header);
1715
1716 version_major = le16_to_cpu(hdr->header.header_version_major);
1717 version_minor = le16_to_cpu(hdr->header.header_version_minor);
1718
1719 if (version_major == 2) {
1720 gfx_v12_1_xcc_load_rlcg_microcode(adev, xcc_id);
1721 if (amdgpu_dpm == 1) {
1722 if (version_minor >= 2)
1723 gfx_v12_1_xcc_load_rlc_iram_dram_microcode(adev, xcc_id);
1724 }
1725
1726 return 0;
1727 }
1728
1729 return -EINVAL;
1730 }
1731
gfx_v12_1_xcc_rlc_resume(struct amdgpu_device * adev,int xcc_id)1732 static int gfx_v12_1_xcc_rlc_resume(struct amdgpu_device *adev,
1733 int xcc_id)
1734 {
1735 int r;
1736
1737 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1738 gfx_v12_1_xcc_init_csb(adev, xcc_id);
1739
1740 if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */
1741 gfx_v12_1_xcc_rlc_enable_srm(adev, xcc_id);
1742 } else {
1743 if (amdgpu_sriov_vf(adev)) {
1744 gfx_v12_1_xcc_init_csb(adev, xcc_id);
1745 return 0;
1746 }
1747
1748 gfx_v12_1_xcc_rlc_stop(adev, xcc_id);
1749
1750 /* disable CG */
1751 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, 0);
1752
1753 /* disable PG */
1754 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_PG_CNTL, 0);
1755
1756 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
1757 /* legacy rlc firmware loading */
1758 r = gfx_v12_1_xcc_rlc_load_microcode(adev, xcc_id);
1759 if (r)
1760 return r;
1761 }
1762
1763 gfx_v12_1_xcc_init_csb(adev, xcc_id);
1764
1765 gfx_v12_1_xcc_rlc_start(adev, xcc_id);
1766 }
1767
1768 return 0;
1769 }
1770
gfx_v12_1_rlc_resume(struct amdgpu_device * adev)1771 static int gfx_v12_1_rlc_resume(struct amdgpu_device *adev)
1772 {
1773 int r, i, num_xcc;
1774
1775 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1776 for (i = 0; i < num_xcc; i++) {
1777 r = gfx_v12_1_xcc_rlc_resume(adev, i);
1778 if (r)
1779 return r;
1780 }
1781
1782 return 0;
1783 }
1784
gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device * adev,int xcc_id)1785 static void gfx_v12_1_xcc_config_gfx_rs64(struct amdgpu_device *adev,
1786 int xcc_id)
1787 {
1788 const struct gfx_firmware_header_v2_0 *mec_hdr;
1789 uint32_t pipe_id, tmp;
1790
1791 mec_hdr = (const struct gfx_firmware_header_v2_0 *)
1792 adev->gfx.mec_fw->data;
1793
1794 /* config mec program start addr */
1795 for (pipe_id = 0; pipe_id < 4; pipe_id++) {
1796 soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
1797 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
1798 mec_hdr->ucode_start_addr_lo >> 2 |
1799 mec_hdr->ucode_start_addr_hi << 30);
1800 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
1801 mec_hdr->ucode_start_addr_hi >> 2);
1802 }
1803 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1804
1805 /* reset mec pipe */
1806 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
1807 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 1);
1808 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 1);
1809 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 1);
1810 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 1);
1811 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
1812
1813 /* clear mec pipe reset */
1814 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET, 0);
1815 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET, 0);
1816 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET, 0);
1817 tmp = REG_SET_FIELD(tmp, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET, 0);
1818 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, tmp);
1819 }
1820
gfx_v12_1_config_gfx_rs64(struct amdgpu_device * adev)1821 static void gfx_v12_1_config_gfx_rs64(struct amdgpu_device *adev)
1822 {
1823 int i, num_xcc;
1824
1825 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
1826
1827 for (i = 0; i < num_xcc; i++)
1828 gfx_v12_1_xcc_config_gfx_rs64(adev, i);
1829 }
1830
gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device * adev,int xcc_id)1831 static void gfx_v12_1_xcc_set_mec_ucode_start_addr(struct amdgpu_device *adev,
1832 int xcc_id)
1833 {
1834 const struct gfx_firmware_header_v2_0 *cp_hdr;
1835 unsigned pipe_id;
1836
1837 cp_hdr = (const struct gfx_firmware_header_v2_0 *)
1838 adev->gfx.mec_fw->data;
1839 mutex_lock(&adev->srbm_mutex);
1840 for (pipe_id = 0; pipe_id < adev->gfx.mec.num_pipe_per_mec; pipe_id++) {
1841 soc_v1_0_grbm_select(adev, 1, pipe_id, 0, 0, GET_INST(GC, xcc_id));
1842 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START,
1843 cp_hdr->ucode_start_addr_lo >> 2 |
1844 cp_hdr->ucode_start_addr_hi << 30);
1845 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_PRGRM_CNTR_START_HI,
1846 cp_hdr->ucode_start_addr_hi >> 2);
1847 }
1848 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
1849 mutex_unlock(&adev->srbm_mutex);
1850 }
1851
gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device * adev,int xcc_id)1852 static int gfx_v12_1_xcc_wait_for_rlc_autoload_complete(struct amdgpu_device *adev,
1853 int xcc_id)
1854 {
1855 uint32_t cp_status;
1856 uint32_t bootload_status;
1857 int i;
1858
1859 for (i = 0; i < adev->usec_timeout; i++) {
1860 cp_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_STAT);
1861 bootload_status = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
1862 regRLC_RLCS_BOOTLOAD_STATUS);
1863
1864 if ((cp_status == 0) &&
1865 (REG_GET_FIELD(bootload_status,
1866 RLC_RLCS_BOOTLOAD_STATUS, BOOTLOAD_COMPLETE) == 1)) {
1867 break;
1868 }
1869 udelay(1);
1870 if (amdgpu_emu_mode)
1871 msleep(10);
1872 }
1873
1874 if (i >= adev->usec_timeout) {
1875 dev_err(adev->dev,
1876 "rlc autoload: xcc%d gc ucode autoload timeout\n", xcc_id);
1877 return -ETIMEDOUT;
1878 }
1879
1880 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
1881 gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
1882 }
1883
1884 return 0;
1885 }
1886
gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device * adev)1887 static int gfx_v12_1_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
1888 {
1889 int xcc_id;
1890
1891 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++)
1892 gfx_v12_1_xcc_wait_for_rlc_autoload_complete(adev, xcc_id);
1893
1894 return 0;
1895 }
1896
gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device * adev,bool enable,int xcc_id)1897 static void gfx_v12_1_xcc_cp_compute_enable(struct amdgpu_device *adev,
1898 bool enable, int xcc_id)
1899 {
1900 u32 data;
1901
1902 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL);
1903 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_INVALIDATE_ICACHE,
1904 enable ? 0 : 1);
1905 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_RESET,
1906 enable ? 0 : 1);
1907 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_RESET,
1908 enable ? 0 : 1);
1909 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_RESET,
1910 enable ? 0 : 1);
1911 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_RESET,
1912 enable ? 0 : 1);
1913 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE0_ACTIVE,
1914 enable ? 1 : 0);
1915 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE1_ACTIVE,
1916 enable ? 1 : 0);
1917 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE2_ACTIVE,
1918 enable ? 1 : 0);
1919 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_PIPE3_ACTIVE,
1920 enable ? 1 : 0);
1921 data = REG_SET_FIELD(data, CP_MEC_RS64_CNTL, MEC_HALT,
1922 enable ? 0 : 1);
1923 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_RS64_CNTL, data);
1924
1925 adev->gfx.kiq[xcc_id].ring.sched.ready = enable;
1926
1927 udelay(50);
1928 }
1929
gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device * adev)1930 static int gfx_v12_1_init_cp_compute_microcode_bo(struct amdgpu_device *adev)
1931 {
1932 const struct gfx_firmware_header_v2_0 *mec_hdr;
1933 const __le32 *fw_ucode, *fw_data;
1934 u32 fw_ucode_size, fw_data_size;
1935 u32 *fw_ucode_ptr, *fw_data_ptr;
1936 int i, r, xcc_id;
1937
1938 if (!adev->gfx.mec_fw)
1939 return -EINVAL;
1940
1941 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
1942 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
1943
1944 fw_ucode = (const __le32 *) (adev->gfx.mec_fw->data +
1945 le32_to_cpu(mec_hdr->ucode_offset_bytes));
1946 fw_ucode_size = le32_to_cpu(mec_hdr->ucode_size_bytes);
1947
1948 fw_data = (const __le32 *) (adev->gfx.mec_fw->data +
1949 le32_to_cpu(mec_hdr->data_offset_bytes));
1950 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
1951
1952 if (adev->gfx.mec.mec_fw_obj == NULL) {
1953 r = amdgpu_bo_create_reserved(adev, fw_ucode_size,
1954 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
1955 &adev->gfx.mec.mec_fw_obj,
1956 &adev->gfx.mec.mec_fw_gpu_addr,
1957 (void **)&fw_ucode_ptr);
1958 if (r) {
1959 dev_err(adev->dev, "(%d) failed to create mec fw ucode bo\n", r);
1960 gfx_v12_1_mec_fini(adev);
1961 return r;
1962 }
1963
1964 memcpy(fw_ucode_ptr, fw_ucode, fw_ucode_size);
1965
1966 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1967 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1968 }
1969
1970 if (adev->gfx.mec.mec_fw_data_obj == NULL) {
1971 r = amdgpu_bo_create_reserved(adev,
1972 ALIGN(fw_data_size, 64 * 1024) *
1973 adev->gfx.mec.num_pipe_per_mec * NUM_XCC(adev->gfx.xcc_mask),
1974 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM,
1975 &adev->gfx.mec.mec_fw_data_obj,
1976 &adev->gfx.mec.mec_fw_data_gpu_addr,
1977 (void **)&fw_data_ptr);
1978 if (r) {
1979 dev_err(adev->dev, "(%d) failed to create mec fw data bo\n", r);
1980 gfx_v12_1_mec_fini(adev);
1981 return r;
1982 }
1983
1984 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
1985 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
1986 u32 offset = (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
1987 ALIGN(fw_data_size, 64 * 1024) / 4;
1988 memcpy(fw_data_ptr + offset, fw_data, fw_data_size);
1989 }
1990 }
1991
1992 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_data_obj);
1993 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_data_obj);
1994 }
1995
1996 return 0;
1997 }
1998
gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device * adev,int xcc_id)1999 static int gfx_v12_1_xcc_cp_compute_load_microcode_rs64(struct amdgpu_device *adev,
2000 int xcc_id)
2001 {
2002 const struct gfx_firmware_header_v2_0 *mec_hdr;
2003 u32 fw_data_size;
2004 u32 tmp, i, usec_timeout = 50000; /* Wait for 50 ms */
2005
2006 if (!adev->gfx.mec_fw)
2007 return -EINVAL;
2008
2009 mec_hdr = (const struct gfx_firmware_header_v2_0 *)adev->gfx.mec_fw->data;
2010 fw_data_size = le32_to_cpu(mec_hdr->data_size_bytes);
2011
2012 gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
2013
2014 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL);
2015 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2016 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, EXE_DISABLE, 0);
2017 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2018 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_CNTL, tmp);
2019
2020 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL);
2021 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, VMID, 0);
2022 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_BASE_CNTL, CACHE_POLICY, 0);
2023 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_BASE_CNTL, tmp);
2024
2025 mutex_lock(&adev->srbm_mutex);
2026 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
2027 soc_v1_0_grbm_select(adev, 1, i, 0, 0, GET_INST(GC, xcc_id));
2028
2029 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_LO,
2030 lower_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2031 (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
2032 ALIGN(fw_data_size, 64 * 1024)));
2033 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_MDBASE_HI,
2034 upper_32_bits(adev->gfx.mec.mec_fw_data_gpu_addr +
2035 (xcc_id * adev->gfx.mec.num_pipe_per_mec + i) *
2036 ALIGN(fw_data_size, 64 * 1024)));
2037
2038 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_LO,
2039 lower_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2040 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_BASE_HI,
2041 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2042 }
2043 mutex_unlock(&adev->srbm_mutex);
2044 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2045
2046 /* Trigger an invalidation of the L1 instruction caches */
2047 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
2048 tmp = REG_SET_FIELD(tmp, CP_MEC_DC_OP_CNTL, INVALIDATE_DCACHE, 1);
2049 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL, tmp);
2050
2051 /* Wait for invalidation complete */
2052 for (i = 0; i < usec_timeout; i++) {
2053 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DC_OP_CNTL);
2054 if (1 == REG_GET_FIELD(tmp, CP_MEC_DC_OP_CNTL,
2055 INVALIDATE_DCACHE_COMPLETE))
2056 break;
2057 udelay(1);
2058 }
2059
2060 if (i >= usec_timeout) {
2061 dev_err(adev->dev, "failed to invalidate data cache\n");
2062 return -EINVAL;
2063 }
2064
2065 /* Trigger an invalidation of the L1 instruction caches */
2066 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
2067 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_OP_CNTL, INVALIDATE_CACHE, 1);
2068 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL, tmp);
2069
2070 /* Wait for invalidation complete */
2071 for (i = 0; i < usec_timeout; i++) {
2072 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_CPC_IC_OP_CNTL);
2073 if (1 == REG_GET_FIELD(tmp, CP_CPC_IC_OP_CNTL,
2074 INVALIDATE_CACHE_COMPLETE))
2075 break;
2076 udelay(1);
2077 }
2078
2079 if (i >= usec_timeout) {
2080 dev_err(adev->dev, "failed to invalidate instruction cache\n");
2081 return -EINVAL;
2082 }
2083
2084 gfx_v12_1_xcc_set_mec_ucode_start_addr(adev, xcc_id);
2085
2086 return 0;
2087 }
2088
gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring * ring,int xcc_id)2089 static void gfx_v12_1_xcc_kiq_setting(struct amdgpu_ring *ring,
2090 int xcc_id)
2091 {
2092 uint32_t tmp;
2093 struct amdgpu_device *adev = ring->adev;
2094
2095 /* tell RLC which is KIQ queue */
2096 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
2097 tmp &= 0xffffff00;
2098 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2099 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2100 tmp |= 0x80;
2101 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2102 }
2103
gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device * adev,int xcc_id)2104 static void gfx_v12_1_xcc_cp_set_doorbell_range(struct amdgpu_device *adev,
2105 int xcc_id)
2106 {
2107 /* disable gfx engine doorbell range */
2108 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_LOWER, 0);
2109 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_DOORBELL_RANGE_UPPER, 0);
2110
2111 /* set compute engine doorbell range */
2112 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_LOWER,
2113 ((adev->doorbell_index.kiq +
2114 xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2115 2) << 2);
2116 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MEC_DOORBELL_RANGE_UPPER,
2117 ((adev->doorbell_index.userqueue_end +
2118 xcc_id * adev->doorbell_index.xcc_doorbell_range) *
2119 2) << 2);
2120 }
2121
gfx_v12_1_compute_mqd_init(struct amdgpu_device * adev,void * m,struct amdgpu_mqd_prop * prop)2122 static int gfx_v12_1_compute_mqd_init(struct amdgpu_device *adev, void *m,
2123 struct amdgpu_mqd_prop *prop)
2124 {
2125 struct v12_1_compute_mqd *mqd = m;
2126 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2127 uint32_t tmp;
2128
2129 mqd->header = 0xC0310800;
2130 mqd->compute_pipelinestat_enable = 0x00000001;
2131 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2132 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2133 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2134 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2135 mqd->compute_misc_reserved = 0x00000007;
2136
2137 eop_base_addr = prop->eop_gpu_addr >> 8;
2138 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2139 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2140
2141 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2142 tmp = regCP_HQD_EOP_CONTROL_DEFAULT;
2143 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2144 (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1));
2145
2146 mqd->cp_hqd_eop_control = tmp;
2147
2148 /* enable doorbell? */
2149 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
2150
2151 if (prop->use_doorbell) {
2152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2153 DOORBELL_OFFSET, prop->doorbell_index);
2154 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2155 DOORBELL_EN, 1);
2156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2157 DOORBELL_SOURCE, 0);
2158 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2159 DOORBELL_HIT, 0);
2160 } else {
2161 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2162 DOORBELL_EN, 0);
2163 }
2164
2165 mqd->cp_hqd_pq_doorbell_control = tmp;
2166
2167 /* disable the queue if it's active */
2168 mqd->cp_hqd_dequeue_request = 0;
2169 mqd->cp_hqd_pq_rptr = 0;
2170 mqd->cp_hqd_pq_wptr_lo = 0;
2171 mqd->cp_hqd_pq_wptr_hi = 0;
2172
2173 /* set the pointer to the MQD */
2174 mqd->cp_mqd_base_addr_lo = prop->mqd_gpu_addr & 0xfffffffc;
2175 mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr);
2176
2177 /* set MQD vmid to 0 */
2178 tmp = regCP_MQD_CONTROL_DEFAULT;
2179 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2180 mqd->cp_mqd_control = tmp;
2181
2182 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2183 hqd_gpu_addr = prop->hqd_base_gpu_addr >> 8;
2184 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2185 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2186
2187 /* set up the HQD, this is similar to CP_RB0_CNTL */
2188 tmp = regCP_HQD_PQ_CONTROL_DEFAULT;
2189 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2190 (order_base_2(prop->queue_size / 4) - 1));
2191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2192 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));
2193 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2194 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0);
2195 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2196 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2197 mqd->cp_hqd_pq_control = tmp;
2198
2199 /* set the wb address whether it's enabled or not */
2200 wb_gpu_addr = prop->rptr_gpu_addr;
2201 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2202 mqd->cp_hqd_pq_rptr_report_addr_hi =
2203 upper_32_bits(wb_gpu_addr) & 0xffff;
2204
2205 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2206 wb_gpu_addr = prop->wptr_gpu_addr;
2207 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2208 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2209
2210 tmp = 0;
2211 /* enable the doorbell if requested */
2212 if (prop->use_doorbell) {
2213 tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT;
2214 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2215 DOORBELL_OFFSET, prop->doorbell_index);
2216
2217 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2218 DOORBELL_EN, 1);
2219 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2220 DOORBELL_SOURCE, 0);
2221 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2222 DOORBELL_HIT, 0);
2223 }
2224
2225 mqd->cp_hqd_pq_doorbell_control = tmp;
2226
2227 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2228 mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT;
2229
2230 /* set the vmid for the queue */
2231 mqd->cp_hqd_vmid = 0;
2232
2233 tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT;
2234 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x63);
2235 mqd->cp_hqd_persistent_state = tmp;
2236
2237 /* set MIN_IB_AVAIL_SIZE */
2238 tmp = regCP_HQD_IB_CONTROL_DEFAULT;
2239 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 1);
2240 mqd->cp_hqd_ib_control = tmp;
2241
2242 /* set static priority for a compute queue/ring */
2243 mqd->cp_hqd_pipe_priority = prop->hqd_pipe_priority;
2244 mqd->cp_hqd_queue_priority = prop->hqd_queue_priority;
2245
2246 mqd->cp_mqd_stride_size = prop->mqd_stride_size ? prop->mqd_stride_size :
2247 AMDGPU_MQD_SIZE_ALIGN(adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size);
2248
2249 mqd->cp_hqd_active = prop->hqd_active;
2250
2251 return 0;
2252 }
2253
gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring * ring,int xcc_id)2254 static int gfx_v12_1_xcc_kiq_init_register(struct amdgpu_ring *ring,
2255 int xcc_id)
2256 {
2257 struct amdgpu_device *adev = ring->adev;
2258 struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2259 int j;
2260
2261 /* inactivate the queue */
2262 if (amdgpu_sriov_vf(adev))
2263 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE, 0);
2264
2265 /* disable wptr polling */
2266 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_WPTR_POLL_CNTL, EN, 0);
2267
2268 /* write the EOP addr */
2269 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR,
2270 mqd->cp_hqd_eop_base_addr_lo);
2271 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_BASE_ADDR_HI,
2272 mqd->cp_hqd_eop_base_addr_hi);
2273
2274 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2275 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_EOP_CONTROL,
2276 mqd->cp_hqd_eop_control);
2277
2278 /* enable doorbell? */
2279 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
2280 mqd->cp_hqd_pq_doorbell_control);
2281
2282 /* disable the queue if it's active */
2283 if (RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1) {
2284 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST, 1);
2285 for (j = 0; j < adev->usec_timeout; j++) {
2286 if (!(RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE) & 1))
2287 break;
2288 udelay(1);
2289 }
2290 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_DEQUEUE_REQUEST,
2291 mqd->cp_hqd_dequeue_request);
2292 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR,
2293 mqd->cp_hqd_pq_rptr);
2294 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
2295 mqd->cp_hqd_pq_wptr_lo);
2296 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
2297 mqd->cp_hqd_pq_wptr_hi);
2298 }
2299
2300 /* set the pointer to the MQD */
2301 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR,
2302 mqd->cp_mqd_base_addr_lo);
2303 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_BASE_ADDR_HI,
2304 mqd->cp_mqd_base_addr_hi);
2305
2306 /* set MQD vmid to 0 */
2307 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_MQD_CONTROL,
2308 mqd->cp_mqd_control);
2309
2310 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2311 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE,
2312 mqd->cp_hqd_pq_base_lo);
2313 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_BASE_HI,
2314 mqd->cp_hqd_pq_base_hi);
2315
2316 /* set up the HQD, this is similar to CP_RB0_CNTL */
2317 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_CONTROL,
2318 mqd->cp_hqd_pq_control);
2319
2320 /* set the wb address whether it's enabled or not */
2321 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR,
2322 mqd->cp_hqd_pq_rptr_report_addr_lo);
2323 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
2324 mqd->cp_hqd_pq_rptr_report_addr_hi);
2325
2326 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2327 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR,
2328 mqd->cp_hqd_pq_wptr_poll_addr_lo);
2329 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_POLL_ADDR_HI,
2330 mqd->cp_hqd_pq_wptr_poll_addr_hi);
2331
2332 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL,
2333 mqd->cp_hqd_pq_doorbell_control);
2334
2335 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2336 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_LO,
2337 mqd->cp_hqd_pq_wptr_lo);
2338 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_WPTR_HI,
2339 mqd->cp_hqd_pq_wptr_hi);
2340
2341 /* set the vmid for the queue */
2342 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_VMID, mqd->cp_hqd_vmid);
2343
2344 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE,
2345 mqd->cp_hqd_persistent_state);
2346
2347 /* activate the queue */
2348 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HQD_ACTIVE,
2349 mqd->cp_hqd_active);
2350
2351 if (ring->use_doorbell)
2352 WREG32_FIELD15_PREREG(GC, GET_INST(GC, xcc_id), CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2353
2354 return 0;
2355 }
2356
gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring * ring,int xcc_id)2357 static int gfx_v12_1_xcc_kiq_init_queue(struct amdgpu_ring *ring,
2358 int xcc_id)
2359 {
2360 struct amdgpu_device *adev = ring->adev;
2361 struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2362
2363 gfx_v12_1_xcc_kiq_setting(ring, xcc_id);
2364
2365 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
2366 /* reset MQD to a clean status */
2367 if (adev->gfx.kiq[xcc_id].mqd_backup)
2368 memcpy(mqd, adev->gfx.kiq[xcc_id].mqd_backup, sizeof(*mqd));
2369
2370 /* reset ring buffer */
2371 ring->wptr = 0;
2372 amdgpu_ring_clear_ring(ring);
2373
2374 mutex_lock(&adev->srbm_mutex);
2375 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2376 gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
2377 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2378 mutex_unlock(&adev->srbm_mutex);
2379 } else {
2380 memset((void *)mqd, 0, sizeof(*mqd));
2381 if (amdgpu_sriov_vf(adev) && adev->in_suspend)
2382 amdgpu_ring_clear_ring(ring);
2383 mutex_lock(&adev->srbm_mutex);
2384 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2385 amdgpu_ring_init_mqd(ring);
2386 gfx_v12_1_xcc_kiq_init_register(ring, xcc_id);
2387 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2388 mutex_unlock(&adev->srbm_mutex);
2389
2390 if (adev->gfx.kiq[xcc_id].mqd_backup)
2391 memcpy(adev->gfx.kiq[xcc_id].mqd_backup, mqd, sizeof(*mqd));
2392 }
2393
2394 return 0;
2395 }
2396
gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring * ring,int xcc_id)2397 static int gfx_v12_1_xcc_kcq_init_queue(struct amdgpu_ring *ring,
2398 int xcc_id)
2399 {
2400 struct amdgpu_device *adev = ring->adev;
2401 struct v12_1_compute_mqd *mqd = ring->mqd_ptr;
2402 int mqd_idx = ring - &adev->gfx.compute_ring[0];
2403
2404 if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
2405 memset((void *)mqd, 0, sizeof(*mqd));
2406 mutex_lock(&adev->srbm_mutex);
2407 soc_v1_0_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, GET_INST(GC, xcc_id));
2408 amdgpu_ring_init_mqd(ring);
2409 soc_v1_0_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id));
2410 mutex_unlock(&adev->srbm_mutex);
2411
2412 if (adev->gfx.mec.mqd_backup[mqd_idx])
2413 memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
2414 } else {
2415 /* restore MQD to a clean status */
2416 if (adev->gfx.mec.mqd_backup[mqd_idx])
2417 memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
2418 /* reset ring buffer */
2419 ring->wptr = 0;
2420 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0);
2421 amdgpu_ring_clear_ring(ring);
2422 }
2423
2424 return 0;
2425 }
2426
gfx_v12_1_xcc_kiq_resume(struct amdgpu_device * adev,int xcc_id)2427 static int gfx_v12_1_xcc_kiq_resume(struct amdgpu_device *adev,
2428 int xcc_id)
2429 {
2430 struct amdgpu_ring *ring;
2431 int r;
2432
2433 ring = &adev->gfx.kiq[xcc_id].ring;
2434
2435 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2436 if (unlikely(r != 0))
2437 return r;
2438
2439 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2440 if (unlikely(r != 0)) {
2441 amdgpu_bo_unreserve(ring->mqd_obj);
2442 return r;
2443 }
2444
2445 gfx_v12_1_xcc_kiq_init_queue(ring, xcc_id);
2446 amdgpu_bo_kunmap(ring->mqd_obj);
2447 ring->mqd_ptr = NULL;
2448 amdgpu_bo_unreserve(ring->mqd_obj);
2449 ring->sched.ready = true;
2450 return 0;
2451 }
2452
gfx_v12_1_xcc_kcq_resume(struct amdgpu_device * adev,int xcc_id)2453 static int gfx_v12_1_xcc_kcq_resume(struct amdgpu_device *adev,
2454 int xcc_id)
2455 {
2456 struct amdgpu_ring *ring = NULL;
2457 int r = 0, i;
2458
2459 if (!amdgpu_async_gfx_ring)
2460 gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
2461
2462 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2463 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
2464
2465 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2466 if (unlikely(r != 0))
2467 goto done;
2468 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2469 if (!r) {
2470 r = gfx_v12_1_xcc_kcq_init_queue(ring, xcc_id);
2471 amdgpu_bo_kunmap(ring->mqd_obj);
2472 ring->mqd_ptr = NULL;
2473 }
2474 amdgpu_bo_unreserve(ring->mqd_obj);
2475 if (r)
2476 goto done;
2477 }
2478
2479 r = amdgpu_gfx_enable_kcq(adev, xcc_id);
2480 done:
2481 return r;
2482 }
2483
gfx_v12_1_xcc_cp_resume(struct amdgpu_device * adev,uint16_t xcc_mask)2484 static int gfx_v12_1_xcc_cp_resume(struct amdgpu_device *adev, uint16_t xcc_mask)
2485 {
2486 int r, i, xcc_id;
2487 struct amdgpu_ring *ring;
2488
2489 for_each_inst(xcc_id, xcc_mask) {
2490 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2491 /* legacy firmware loading */
2492 r = gfx_v12_1_xcc_cp_compute_load_microcode_rs64(adev, xcc_id);
2493 if (r)
2494 return r;
2495 }
2496
2497 /* GFX CGCG and LS is set by default */
2498 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
2499 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, true, xcc_id);
2500
2501 gfx_v12_1_xcc_cp_set_doorbell_range(adev, xcc_id);
2502
2503 gfx_v12_1_xcc_cp_compute_enable(adev, true, xcc_id);
2504
2505 if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
2506 r = amdgpu_mes_kiq_hw_init(adev, xcc_id);
2507 else
2508 r = gfx_v12_1_xcc_kiq_resume(adev, xcc_id);
2509 if (r)
2510 return r;
2511
2512 r = gfx_v12_1_xcc_kcq_resume(adev, xcc_id);
2513 if (r)
2514 return r;
2515
2516 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2517 ring = &adev->gfx.compute_ring[i + xcc_id * adev->gfx.num_compute_rings];
2518 r = amdgpu_ring_test_helper(ring);
2519 if (r)
2520 return r;
2521 }
2522 }
2523
2524 return 0;
2525 }
2526
gfx_v12_1_cp_resume(struct amdgpu_device * adev)2527 static int gfx_v12_1_cp_resume(struct amdgpu_device *adev)
2528 {
2529 int num_xcc, num_xcp, num_xcc_per_xcp;
2530 uint16_t xcc_mask;
2531 int r = 0;
2532
2533 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2534 if (amdgpu_sriov_vf(adev)) {
2535 enum amdgpu_gfx_partition mode;
2536
2537 mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
2538 AMDGPU_XCP_FL_NONE);
2539 if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
2540 return -EINVAL;
2541 if (adev->gfx.funcs &&
2542 adev->gfx.funcs->get_xccs_per_xcp) {
2543 num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev);
2544 adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp;
2545 num_xcp = num_xcc / num_xcc_per_xcp;
2546 } else {
2547 return -EINVAL;
2548 }
2549 r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode);
2550
2551 } else {
2552 if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr,
2553 AMDGPU_XCP_FL_NONE) ==
2554 AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE)
2555 r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr,
2556 amdgpu_user_partt_mode);
2557 }
2558
2559 if (r)
2560 return r;
2561
2562 xcc_mask = GENMASK(NUM_XCC(adev->gfx.xcc_mask) - 1, 0);
2563
2564 return gfx_v12_1_xcc_cp_resume(adev, xcc_mask);
2565 }
2566
gfx_v12_1_gfxhub_enable(struct amdgpu_device * adev)2567 static int gfx_v12_1_gfxhub_enable(struct amdgpu_device *adev)
2568 {
2569 int r, i;
2570 bool value;
2571
2572 r = adev->gfxhub.funcs->gart_enable(adev);
2573 if (r)
2574 return r;
2575
2576 value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
2577 false : true;
2578
2579 adev->gfxhub.funcs->set_fault_enable_default(adev, value);
2580 /* TODO investigate why TLB flush is needed,
2581 * are we missing a flush somewhere else? */
2582 for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) {
2583 if (AMDGPU_IS_GFXHUB(i))
2584 adev->gmc.gmc_funcs->flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(i), 0);
2585 }
2586
2587 return 0;
2588 }
2589
get_gb_addr_config(struct amdgpu_device * adev)2590 static int get_gb_addr_config(struct amdgpu_device *adev)
2591 {
2592 u32 gb_addr_config;
2593
2594 gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG_READ);
2595 if (gb_addr_config == 0)
2596 return -EINVAL;
2597
2598 adev->gfx.config.gb_addr_config_fields.num_pkrs =
2599 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG_READ, NUM_PKRS);
2600
2601 adev->gfx.config.gb_addr_config = gb_addr_config;
2602
2603 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2604 REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2605 GB_ADDR_CONFIG_READ, NUM_PIPES);
2606
2607 adev->gfx.config.max_tile_pipes =
2608 adev->gfx.config.gb_addr_config_fields.num_pipes;
2609
2610 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2611 REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2612 GB_ADDR_CONFIG_READ, MAX_COMPRESSED_FRAGS);
2613 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2614 REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2615 GB_ADDR_CONFIG_READ, NUM_RB_PER_SE);
2616 adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2617 REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2618 GB_ADDR_CONFIG_READ, NUM_SHADER_ENGINES);
2619 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2620 REG_GET_FIELD(adev->gfx.config.gb_addr_config,
2621 GB_ADDR_CONFIG_READ, PIPE_INTERLEAVE_SIZE));
2622
2623 return 0;
2624 }
2625
gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device * adev,int xcc_id)2626 static void gfx_v12_1_xcc_disable_gpa_mode(struct amdgpu_device *adev,
2627 int xcc_id)
2628 {
2629 uint32_t data;
2630
2631 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG);
2632 data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK;
2633 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data);
2634
2635 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG);
2636 data |= CPG_PSP_DEBUG__GPA_OVERRIDE_MASK;
2637 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPG_PSP_DEBUG, data);
2638 }
2639
gfx_v12_1_xcc_enable_atomics(struct amdgpu_device * adev,int xcc_id)2640 static void gfx_v12_1_xcc_enable_atomics(struct amdgpu_device *adev,
2641 int xcc_id)
2642 {
2643 uint32_t data;
2644
2645 /* Set the TCP UTCL0 register to enable atomics */
2646 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1);
2647 data = REG_SET_FIELD(data, TCP_UTCL0_CNTL1, ATOMIC_REQUESTER_EN, 0x1);
2648
2649 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_UTCL0_CNTL1, data);
2650 }
2651
gfx_v12_1_xcc_disable_burst(struct amdgpu_device * adev,int xcc_id)2652 static void gfx_v12_1_xcc_disable_burst(struct amdgpu_device *adev,
2653 int xcc_id)
2654 {
2655 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGL1_DRAM_BURST_CTRL, 0xf);
2656 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGLARB_DRAM_BURST_CTRL, 0xf);
2657 }
2658
gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device * adev,int xcc_id)2659 static void gfx_v12_1_xcc_disable_early_write_ack(struct amdgpu_device *adev,
2660 int xcc_id)
2661 {
2662 uint32_t data;
2663
2664 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3);
2665 data = REG_SET_FIELD(data, TCP_CNTL3, DISABLE_EARLY_WRITE_ACK, 0x1);
2666
2667 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL3, data);
2668 }
2669
gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device * adev,int xcc_id)2670 static void gfx_v12_1_xcc_disable_tcp_spill_cache(struct amdgpu_device *adev,
2671 int xcc_id)
2672 {
2673 uint32_t data;
2674
2675 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL);
2676 data = REG_SET_FIELD(data, TCP_CNTL, TCP_SPILL_CACHE_DISABLE, 0x1);
2677
2678 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regTCP_CNTL, data);
2679 }
2680
gfx_v12_1_init_golden_registers(struct amdgpu_device * adev)2681 static void gfx_v12_1_init_golden_registers(struct amdgpu_device *adev)
2682 {
2683 int i;
2684
2685 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); i++) {
2686 gfx_v12_1_xcc_disable_burst(adev, i);
2687 gfx_v12_1_xcc_enable_atomics(adev, i);
2688 gfx_v12_1_xcc_disable_early_write_ack(adev, i);
2689 gfx_v12_1_xcc_disable_tcp_spill_cache(adev, i);
2690 }
2691 }
2692
gfx_v12_1_hw_init(struct amdgpu_ip_block * ip_block)2693 static int gfx_v12_1_hw_init(struct amdgpu_ip_block *ip_block)
2694 {
2695 int r, i, num_xcc;
2696 struct amdgpu_device *adev = ip_block->adev;
2697
2698 if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
2699 /* rlc autoload firmware */
2700 r = gfx_v12_1_rlc_backdoor_autoload_enable(adev);
2701 if (r)
2702 return r;
2703 } else {
2704 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
2705 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2706
2707 if (adev->gfx.imu.funcs) {
2708 if (adev->gfx.imu.funcs->load_microcode)
2709 adev->gfx.imu.funcs->load_microcode(adev);
2710 }
2711
2712 for (i = 0; i < num_xcc; i++) {
2713 /* disable gpa mode in backdoor loading */
2714 gfx_v12_1_xcc_disable_gpa_mode(adev, i);
2715 }
2716 }
2717 }
2718
2719 if ((adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) ||
2720 (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) {
2721 r = gfx_v12_1_wait_for_rlc_autoload_complete(adev);
2722 if (r) {
2723 dev_err(adev->dev, "(%d) failed to wait rlc autoload complete\n", r);
2724 return r;
2725 }
2726 }
2727
2728 adev->gfx.is_poweron = true;
2729
2730 if (get_gb_addr_config(adev))
2731 DRM_WARN("Invalid gb_addr_config !\n");
2732
2733 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)
2734 gfx_v12_1_config_gfx_rs64(adev);
2735
2736 r = gfx_v12_1_gfxhub_enable(adev);
2737 if (r)
2738 return r;
2739
2740 gfx_v12_1_init_golden_registers(adev);
2741
2742 gfx_v12_1_constants_init(adev);
2743
2744 if (adev->nbio.funcs->gc_doorbell_init)
2745 adev->nbio.funcs->gc_doorbell_init(adev);
2746
2747 r = gfx_v12_1_rlc_resume(adev);
2748 if (r)
2749 return r;
2750
2751 /*
2752 * init golden registers and rlc resume may override some registers,
2753 * reconfig them here
2754 */
2755 gfx_v12_1_tcp_harvest(adev);
2756
2757 r = gfx_v12_1_cp_resume(adev);
2758 if (r)
2759 return r;
2760
2761 return r;
2762 }
2763
gfx_v12_1_xcc_fini(struct amdgpu_device * adev,int xcc_id)2764 static void gfx_v12_1_xcc_fini(struct amdgpu_device *adev,
2765 int xcc_id)
2766 {
2767 uint32_t tmp;
2768
2769 if (!adev->no_hw_access) {
2770 if (amdgpu_gfx_disable_kcq(adev, xcc_id))
2771 DRM_ERROR("KCQ disable failed\n");
2772
2773 amdgpu_mes_kiq_hw_fini(adev, xcc_id);
2774 }
2775
2776 if (amdgpu_sriov_vf(adev)) {
2777 /* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
2778 tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS);
2779 tmp &= 0xffffff00;
2780 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp);
2781 }
2782 gfx_v12_1_xcc_cp_compute_enable(adev, false, xcc_id);
2783 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, false, xcc_id);
2784 }
2785
gfx_v12_1_set_userq_eop_interrupts(struct amdgpu_device * adev,bool enable)2786 static int gfx_v12_1_set_userq_eop_interrupts(struct amdgpu_device *adev,
2787 bool enable)
2788 {
2789 unsigned int irq_type;
2790 int m, p, r;
2791
2792 if (adev->gfx.disable_kq) {
2793 for (m = 0; m < adev->gfx.mec.num_mec; ++m) {
2794 for (p = 0; p < adev->gfx.mec.num_pipe_per_mec; p++) {
2795 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2796 + (m * adev->gfx.mec.num_pipe_per_mec)
2797 + p;
2798 if (enable)
2799 r = amdgpu_irq_get(adev, &adev->gfx.eop_irq,
2800 irq_type);
2801 else
2802 r = amdgpu_irq_put(adev, &adev->gfx.eop_irq,
2803 irq_type);
2804 if (r)
2805 return r;
2806 }
2807 }
2808 }
2809
2810 return 0;
2811 }
2812
gfx_v12_1_hw_fini(struct amdgpu_ip_block * ip_block)2813 static int gfx_v12_1_hw_fini(struct amdgpu_ip_block *ip_block)
2814 {
2815 struct amdgpu_device *adev = ip_block->adev;
2816 int i, num_xcc;
2817
2818 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
2819 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
2820 gfx_v12_1_set_userq_eop_interrupts(adev, false);
2821
2822 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2823 for (i = 0; i < num_xcc; i++) {
2824 gfx_v12_1_xcc_fini(adev, i);
2825 }
2826
2827 adev->gfxhub.funcs->gart_disable(adev);
2828
2829 adev->gfx.is_poweron = false;
2830
2831 return 0;
2832 }
2833
gfx_v12_1_suspend(struct amdgpu_ip_block * ip_block)2834 static int gfx_v12_1_suspend(struct amdgpu_ip_block *ip_block)
2835 {
2836 return gfx_v12_1_hw_fini(ip_block);
2837 }
2838
gfx_v12_1_resume(struct amdgpu_ip_block * ip_block)2839 static int gfx_v12_1_resume(struct amdgpu_ip_block *ip_block)
2840 {
2841 return gfx_v12_1_hw_init(ip_block);
2842 }
2843
gfx_v12_1_is_idle(struct amdgpu_ip_block * ip_block)2844 static bool gfx_v12_1_is_idle(struct amdgpu_ip_block *ip_block)
2845 {
2846 struct amdgpu_device *adev = ip_block->adev;
2847 int i, num_xcc;
2848
2849 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2850 for (i = 0; i < num_xcc; i++) {
2851 if (REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, i),
2852 regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
2853 return false;
2854 }
2855 return true;
2856 }
2857
gfx_v12_1_wait_for_idle(struct amdgpu_ip_block * ip_block)2858 static int gfx_v12_1_wait_for_idle(struct amdgpu_ip_block *ip_block)
2859 {
2860 unsigned i;
2861 struct amdgpu_device *adev = ip_block->adev;
2862
2863 for (i = 0; i < adev->usec_timeout; i++) {
2864 if (gfx_v12_1_is_idle(ip_block))
2865 return 0;
2866 udelay(1);
2867 }
2868 return -ETIMEDOUT;
2869 }
2870
gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device * adev)2871 static uint64_t gfx_v12_1_get_gpu_clock_counter(struct amdgpu_device *adev)
2872 {
2873 uint64_t clock = 0;
2874
2875 if (adev->smuio.funcs &&
2876 adev->smuio.funcs->get_gpu_clock_counter)
2877 clock = adev->smuio.funcs->get_gpu_clock_counter(adev);
2878 else
2879 dev_warn(adev->dev, "query gpu clock counter is not supported\n");
2880
2881 return clock;
2882 }
2883
gfx_v12_1_early_init(struct amdgpu_ip_block * ip_block)2884 static int gfx_v12_1_early_init(struct amdgpu_ip_block *ip_block)
2885 {
2886 struct amdgpu_device *adev = ip_block->adev;
2887
2888
2889 switch (amdgpu_user_queue) {
2890 case -1:
2891 default:
2892 adev->gfx.disable_kq = true;
2893 adev->gfx.disable_uq = true;
2894 break;
2895 case 0:
2896 adev->gfx.disable_kq = false;
2897 adev->gfx.disable_uq = true;
2898 break;
2899 }
2900
2901 adev->gfx.funcs = &gfx_v12_1_gfx_funcs;
2902
2903 if (adev->gfx.disable_kq)
2904 adev->gfx.num_compute_rings = 0;
2905 else
2906 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
2907 AMDGPU_MAX_COMPUTE_RINGS);
2908
2909 gfx_v12_1_set_kiq_pm4_funcs(adev);
2910 gfx_v12_1_set_ring_funcs(adev);
2911 gfx_v12_1_set_irq_funcs(adev);
2912 gfx_v12_1_set_rlc_funcs(adev);
2913 gfx_v12_1_set_mqd_funcs(adev);
2914 gfx_v12_1_set_imu_funcs(adev);
2915
2916 gfx_v12_1_init_rlcg_reg_access_ctrl(adev);
2917
2918 return gfx_v12_1_init_microcode(adev);
2919 }
2920
gfx_v12_1_late_init(struct amdgpu_ip_block * ip_block)2921 static int gfx_v12_1_late_init(struct amdgpu_ip_block *ip_block)
2922 {
2923 struct amdgpu_device *adev = ip_block->adev;
2924 int r;
2925
2926 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
2927 if (r)
2928 return r;
2929
2930 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
2931 if (r)
2932 return r;
2933
2934 r = gfx_v12_1_set_userq_eop_interrupts(adev, true);
2935 if (r)
2936 return r;
2937
2938 return 0;
2939 }
2940
gfx_v12_1_is_rlc_enabled(struct amdgpu_device * adev)2941 static bool gfx_v12_1_is_rlc_enabled(struct amdgpu_device *adev)
2942 {
2943 uint32_t rlc_cntl;
2944
2945 /* if RLC is not enabled, do nothing */
2946 rlc_cntl = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CNTL);
2947 return (REG_GET_FIELD(rlc_cntl, RLC_CNTL, RLC_ENABLE_F32)) ? true : false;
2948 }
2949
gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device * adev,int xcc_id)2950 static void gfx_v12_1_xcc_set_safe_mode(struct amdgpu_device *adev,
2951 int xcc_id)
2952 {
2953 uint32_t data;
2954 unsigned i;
2955
2956 data = RLC_SAFE_MODE__CMD_MASK;
2957 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
2958
2959 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data);
2960
2961 /* wait for RLC_SAFE_MODE */
2962 for (i = 0; i < adev->usec_timeout; i++) {
2963 if (!REG_GET_FIELD(RREG32_SOC15(GC, GET_INST(GC, xcc_id),
2964 regRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
2965 break;
2966 udelay(1);
2967 }
2968 }
2969
gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device * adev,int xcc_id)2970 static void gfx_v12_1_xcc_unset_safe_mode(struct amdgpu_device *adev,
2971 int xcc_id)
2972 {
2973 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
2974 regRLC_SAFE_MODE, RLC_SAFE_MODE__CMD_MASK);
2975 }
2976
gfx_v12_1_update_perf_clk(struct amdgpu_device * adev,bool enable)2977 static void gfx_v12_1_update_perf_clk(struct amdgpu_device *adev,
2978 bool enable)
2979 {
2980 int i, num_xcc;
2981
2982 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
2983 for (i = 0; i < num_xcc; i++)
2984 gfx_v12_1_xcc_update_perf_clk(adev, enable, i);
2985 }
2986
gfx_v12_1_update_spm_vmid(struct amdgpu_device * adev,int xcc_id,struct amdgpu_ring * ring,unsigned vmid)2987 static void gfx_v12_1_update_spm_vmid(struct amdgpu_device *adev,
2988 int xcc_id,
2989 struct amdgpu_ring *ring,
2990 unsigned vmid)
2991 {
2992 u32 reg, data;
2993
2994 reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
2995 if (amdgpu_sriov_is_pp_one_vf(adev))
2996 data = RREG32_NO_KIQ(reg);
2997 else
2998 data = RREG32(reg);
2999
3000 data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
3001 data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
3002
3003 if (amdgpu_sriov_is_pp_one_vf(adev))
3004 WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
3005 else
3006 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL, data);
3007
3008 if (ring
3009 && amdgpu_sriov_is_pp_one_vf(adev)
3010 && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
3011 || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
3012 uint32_t reg = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPM_MC_CNTL);
3013 amdgpu_ring_emit_wreg(ring, reg, data);
3014 }
3015 }
3016
3017 static const struct amdgpu_rlc_funcs gfx_v12_1_rlc_funcs = {
3018 .is_rlc_enabled = gfx_v12_1_is_rlc_enabled,
3019 .set_safe_mode = gfx_v12_1_xcc_set_safe_mode,
3020 .unset_safe_mode = gfx_v12_1_xcc_unset_safe_mode,
3021 .init = gfx_v12_1_rlc_init,
3022 .get_csb_size = gfx_v12_1_get_csb_size,
3023 .get_csb_buffer = gfx_v12_1_get_csb_buffer,
3024 .resume = gfx_v12_1_rlc_resume,
3025 .stop = gfx_v12_1_rlc_stop,
3026 .reset = gfx_v12_1_rlc_reset,
3027 .start = gfx_v12_1_rlc_start,
3028 .update_spm_vmid = gfx_v12_1_update_spm_vmid,
3029 };
3030
3031 #if 0
3032 static void gfx_v12_cntl_power_gating(struct amdgpu_device *adev, bool enable)
3033 {
3034 /* TODO */
3035 }
3036
3037 static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable)
3038 {
3039 /* TODO */
3040 }
3041 #endif
3042
gfx_v12_1_set_powergating_state(struct amdgpu_ip_block * ip_block,enum amd_powergating_state state)3043 static int gfx_v12_1_set_powergating_state(struct amdgpu_ip_block *ip_block,
3044 enum amd_powergating_state state)
3045 {
3046 struct amdgpu_device *adev = ip_block->adev;
3047 bool enable = (state == AMD_PG_STATE_GATE);
3048
3049 if (amdgpu_sriov_vf(adev))
3050 return 0;
3051
3052 switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
3053 case IP_VERSION(12, 1, 0):
3054 amdgpu_gfx_off_ctrl(adev, enable);
3055 break;
3056 default:
3057 break;
3058 }
3059
3060 return 0;
3061 }
3062
gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device * adev,bool enable,int xcc_id)3063 static void gfx_v12_1_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
3064 bool enable, int xcc_id)
3065 {
3066 uint32_t def, data;
3067
3068 if (!(adev->cg_flags &
3069 (AMD_CG_SUPPORT_GFX_CGCG |
3070 AMD_CG_SUPPORT_GFX_CGLS |
3071 AMD_CG_SUPPORT_GFX_3D_CGCG |
3072 AMD_CG_SUPPORT_GFX_3D_CGLS)))
3073 return;
3074
3075 if (enable) {
3076 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id),
3077 regRLC_CGTT_MGCG_OVERRIDE);
3078
3079 /* unset CGCG override */
3080 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3081 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
3082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3083 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
3084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG ||
3085 adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
3086 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
3087
3088 /* update CGCG override bits */
3089 if (def != data)
3090 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
3091 regRLC_CGTT_MGCG_OVERRIDE, data);
3092
3093 /* enable cgcg FSM(0x0000363F) */
3094 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
3095
3096 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
3097 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD_MASK;
3098 data |= (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
3099 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3100 }
3101
3102 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
3103 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY_MASK;
3104 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
3105 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3106 }
3107
3108 if (def != data)
3109 WREG32_SOC15(GC, GET_INST(GC, xcc_id),
3110 regRLC_CGCG_CGLS_CTRL, data);
3111
3112 /* set IDLE_POLL_COUNT(0x00900100) */
3113 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL);
3114
3115 data &= ~CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY_MASK;
3116 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3117 data |= (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
3118 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3119
3120 if (def != data)
3121 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data);
3122
3123 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL);
3124 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
3125 data = REG_SET_FIELD(data, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
3126 data = REG_SET_FIELD(data, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
3127 data = REG_SET_FIELD(data, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
3128 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_INT_CNTL, data);
3129 } else {
3130 /* Program RLC_CGCG_CGLS_CTRL */
3131 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL);
3132
3133 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)
3134 data &= ~RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
3135
3136 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
3137 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
3138
3139 if (def != data)
3140 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data);
3141 }
3142 }
3143
gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device * adev,bool enable,int xcc_id)3144 static void gfx_v12_1_xcc_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3145 bool enable, int xcc_id)
3146 {
3147 uint32_t data, def;
3148 if (!(adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)))
3149 return;
3150
3151 /* It is disabled by HW by default */
3152 if (enable) {
3153 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3154 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
3155 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3156
3157 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3158 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3159 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3160
3161 if (def != data)
3162 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3163 }
3164 } else {
3165 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
3166 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3167
3168 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
3169 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
3170 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK);
3171
3172 if (def != data)
3173 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3174 }
3175 }
3176 }
3177
gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device * adev,bool enable,int xcc_id)3178 static void gfx_v12_1_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
3179 bool enable, int xcc_id)
3180 {
3181 uint32_t def, data;
3182
3183 if (!(adev->cg_flags & AMD_CG_SUPPORT_REPEATER_FGCG))
3184 return;
3185
3186 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3187
3188 if (enable)
3189 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3190 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK);
3191 else
3192 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK |
3193 RLC_CGTT_MGCG_OVERRIDE__RLC_REPEATER_FGCG_OVERRIDE_MASK;
3194
3195 if (def != data)
3196 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3197 }
3198
gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device * adev,bool enable,int xcc_id)3199 static void gfx_v12_1_xcc_update_sram_fgcg(struct amdgpu_device *adev,
3200 bool enable, int xcc_id)
3201 {
3202 uint32_t def, data;
3203
3204 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_FGCG))
3205 return;
3206
3207 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3208
3209 if (enable)
3210 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
3211 else
3212 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK;
3213
3214 if (def != data)
3215 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3216 }
3217
gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device * adev,bool enable,int xcc_id)3218 static void gfx_v12_1_xcc_update_perf_clk(struct amdgpu_device *adev,
3219 bool enable, int xcc_id)
3220 {
3221 uint32_t def, data;
3222
3223 if (!(adev->cg_flags & AMD_CG_SUPPORT_GFX_PERF_CLK))
3224 return;
3225
3226 def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE);
3227
3228 if (enable)
3229 data &= ~RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3230 else
3231 data |= RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK;
3232
3233 if (def != data)
3234 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data);
3235 }
3236
gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device * adev,bool enable,int xcc_id)3237 static int gfx_v12_1_xcc_update_gfx_clock_gating(struct amdgpu_device *adev,
3238 bool enable, int xcc_id)
3239 {
3240 amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id);
3241
3242 gfx_v12_1_xcc_update_coarse_grain_clock_gating(adev, enable, xcc_id);
3243
3244 gfx_v12_1_xcc_update_medium_grain_clock_gating(adev, enable, xcc_id);
3245
3246 gfx_v12_1_xcc_update_repeater_fgcg(adev, enable, xcc_id);
3247
3248 gfx_v12_1_xcc_update_sram_fgcg(adev, enable, xcc_id);
3249
3250 gfx_v12_1_xcc_update_perf_clk(adev, enable, xcc_id);
3251
3252 if (adev->cg_flags &
3253 (AMD_CG_SUPPORT_GFX_MGCG |
3254 AMD_CG_SUPPORT_GFX_CGLS |
3255 AMD_CG_SUPPORT_GFX_CGCG |
3256 AMD_CG_SUPPORT_GFX_3D_CGCG |
3257 AMD_CG_SUPPORT_GFX_3D_CGLS))
3258 gfx_v12_1_xcc_enable_gui_idle_interrupt(adev, enable, xcc_id);
3259
3260 amdgpu_gfx_rlc_exit_safe_mode(adev, xcc_id);
3261
3262 return 0;
3263 }
3264
gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)3265 static int gfx_v12_1_set_clockgating_state(struct amdgpu_ip_block *ip_block,
3266 enum amd_clockgating_state state)
3267 {
3268 struct amdgpu_device *adev = ip_block->adev;
3269 int i, num_xcc;
3270
3271 if (amdgpu_sriov_vf(adev))
3272 return 0;
3273
3274 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3275 switch (adev->ip_versions[GC_HWIP][0]) {
3276 case IP_VERSION(12, 1, 0):
3277 for (i = 0; i < num_xcc; i++)
3278 gfx_v12_1_xcc_update_gfx_clock_gating(adev,
3279 state == AMD_CG_STATE_GATE, i);
3280 break;
3281 default:
3282 break;
3283 }
3284
3285 return 0;
3286 }
3287
gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block * ip_block,u64 * flags)3288 static void gfx_v12_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags)
3289 {
3290 struct amdgpu_device *adev = ip_block->adev;
3291 int data;
3292
3293 /* AMD_CG_SUPPORT_GFX_MGCG */
3294 data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGTT_MGCG_OVERRIDE);
3295 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
3296 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
3297
3298 /* AMD_CG_SUPPORT_REPEATER_FGCG */
3299 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_REPEATER_FGCG_OVERRIDE_MASK))
3300 *flags |= AMD_CG_SUPPORT_REPEATER_FGCG;
3301
3302 /* AMD_CG_SUPPORT_GFX_FGCG */
3303 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_FGCG_OVERRIDE_MASK))
3304 *flags |= AMD_CG_SUPPORT_GFX_FGCG;
3305
3306 /* AMD_CG_SUPPORT_GFX_PERF_CLK */
3307 if (!(data & RLC_CGTT_MGCG_OVERRIDE__PERFMON_CLOCK_STATE_MASK))
3308 *flags |= AMD_CG_SUPPORT_GFX_PERF_CLK;
3309
3310 /* AMD_CG_SUPPORT_GFX_CGCG */
3311 data = RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CGCG_CGLS_CTRL);
3312 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
3313 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
3314
3315 /* AMD_CG_SUPPORT_GFX_CGLS */
3316 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
3317 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
3318 }
3319
gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring * ring)3320 static u64 gfx_v12_1_ring_get_rptr_compute(struct amdgpu_ring *ring)
3321 {
3322 /* gfx12 hardware is 32bit rptr */
3323 return *(uint32_t *)ring->rptr_cpu_addr;
3324 }
3325
gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring * ring)3326 static u64 gfx_v12_1_ring_get_wptr_compute(struct amdgpu_ring *ring)
3327 {
3328 u64 wptr;
3329
3330 /* XXX check if swapping is necessary on BE */
3331 if (ring->use_doorbell)
3332 wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr);
3333 else
3334 BUG();
3335 return wptr;
3336 }
3337
gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring * ring)3338 static void gfx_v12_1_ring_set_wptr_compute(struct amdgpu_ring *ring)
3339 {
3340 struct amdgpu_device *adev = ring->adev;
3341
3342 /* XXX check if swapping is necessary on BE */
3343 if (ring->use_doorbell) {
3344 atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
3345 ring->wptr);
3346 WDOORBELL64(ring->doorbell_index, ring->wptr);
3347 } else {
3348 BUG(); /* only DOORBELL method supported on gfx12 now */
3349 }
3350 }
3351
gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring * ring,struct amdgpu_job * job,struct amdgpu_ib * ib,uint32_t flags)3352 static void gfx_v12_1_ring_emit_ib_compute(struct amdgpu_ring *ring,
3353 struct amdgpu_job *job,
3354 struct amdgpu_ib *ib,
3355 uint32_t flags)
3356 {
3357 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
3358 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
3359
3360 /* Currently, there is a high possibility to get wave ID mismatch
3361 * between ME and GDS, leading to a hw deadlock, because ME generates
3362 * different wave IDs than the GDS expects. This situation happens
3363 * randomly when at least 5 compute pipes use GDS ordered append.
3364 * The wave IDs generated by ME are also wrong after suspend/resume.
3365 * Those are probably bugs somewhere else in the kernel driver.
3366 *
3367 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
3368 * GDS to 0 for this ring (me/pipe).
3369 */
3370 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
3371 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3372 amdgpu_ring_write(ring, regGDS_COMPUTE_MAX_WAVE_ID);
3373 }
3374
3375 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
3376 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3377 amdgpu_ring_write(ring,
3378 #ifdef __BIG_ENDIAN
3379 (2 << 0) |
3380 #endif
3381 lower_32_bits(ib->gpu_addr));
3382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3383 amdgpu_ring_write(ring, control);
3384 }
3385
gfx_v12_1_ring_emit_fence(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned flags)3386 static void gfx_v12_1_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3387 u64 seq, unsigned flags)
3388 {
3389 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3390 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3391
3392 /* RELEASE_MEM - flush caches, send int */
3393 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3394 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ(1) |
3395 PACKET3_RELEASE_MEM_GCR_GLV_WB |
3396 PACKET3_RELEASE_MEM_GCR_GL2_WB |
3397 PACKET3_RELEASE_MEM_GCR_GL2_SCOPE(2) |
3398 PACKET3_RELEASE_MEM_TEMPORAL(3) |
3399 PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3400 PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
3401 amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
3402 PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
3403
3404 /*
3405 * the address should be Qword aligned if 64bit write, Dword
3406 * aligned if only send 32bit data low (discard data high)
3407 */
3408 if (write64bit)
3409 BUG_ON(addr & 0x7);
3410 else
3411 BUG_ON(addr & 0x3);
3412 amdgpu_ring_write(ring, lower_32_bits(addr));
3413 amdgpu_ring_write(ring, upper_32_bits(addr));
3414 amdgpu_ring_write(ring, lower_32_bits(seq));
3415 amdgpu_ring_write(ring, upper_32_bits(seq));
3416 amdgpu_ring_write(ring, 0);
3417 }
3418
gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring * ring)3419 static void gfx_v12_1_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3420 {
3421 uint32_t seq = ring->fence_drv.sync_seq;
3422 uint64_t addr = ring->fence_drv.gpu_addr;
3423
3424 gfx_v12_1_wait_reg_mem(ring, 0, 1, 0, lower_32_bits(addr),
3425 upper_32_bits(addr), seq, 0xffffffff, 4);
3426 }
3427
gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring * ring,uint16_t pasid,uint32_t flush_type,bool all_hub,uint8_t dst_sel)3428 static void gfx_v12_1_ring_invalidate_tlbs(struct amdgpu_ring *ring,
3429 uint16_t pasid, uint32_t flush_type,
3430 bool all_hub, uint8_t dst_sel)
3431 {
3432 amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
3433 amdgpu_ring_write(ring,
3434 PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
3435 PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
3436 PACKET3_INVALIDATE_TLBS_PASID(pasid) |
3437 PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
3438 }
3439
gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring * ring,unsigned vmid,uint64_t pd_addr)3440 static void gfx_v12_1_ring_emit_vm_flush(struct amdgpu_ring *ring,
3441 unsigned vmid, uint64_t pd_addr)
3442 {
3443 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
3444
3445 /* compute doesn't have PFP */
3446 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
3447 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3448 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3449 amdgpu_ring_write(ring, 0x0);
3450 }
3451 }
3452
gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring * ring,u64 addr,u64 seq,unsigned int flags)3453 static void gfx_v12_1_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3454 u64 seq, unsigned int flags)
3455 {
3456 struct amdgpu_device *adev = ring->adev;
3457
3458 /* we only allocate 32bit for each seq wb address */
3459 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3460
3461 /* write fence seq to the "addr" */
3462 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3463 amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3464 amdgpu_ring_write(ring, lower_32_bits(addr));
3465 amdgpu_ring_write(ring, upper_32_bits(addr));
3466 amdgpu_ring_write(ring, lower_32_bits(seq));
3467
3468 if (flags & AMDGPU_FENCE_FLAG_INT) {
3469 /* set register to trigger INT */
3470 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3471 amdgpu_ring_write(ring, (WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3472 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regCPC_INT_STATUS));
3473 amdgpu_ring_write(ring, 0);
3474 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3475 }
3476 }
3477
gfx_v12_1_ring_emit_rreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t reg_val_offs)3478 static void gfx_v12_1_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
3479 uint32_t reg_val_offs)
3480 {
3481 struct amdgpu_device *adev = ring->adev;
3482
3483 reg = soc_v1_0_normalize_xcc_reg_offset(reg);
3484
3485 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3486 amdgpu_ring_write(ring, 0 | /* src: register*/
3487 (5 << 8) | /* dst: memory */
3488 (1 << 20)); /* write confirm */
3489 amdgpu_ring_write(ring, reg);
3490 amdgpu_ring_write(ring, 0);
3491 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3492 reg_val_offs * 4));
3493 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3494 reg_val_offs * 4));
3495 }
3496
gfx_v12_1_ring_emit_wreg(struct amdgpu_ring * ring,uint32_t reg,uint32_t val)3497 static void gfx_v12_1_ring_emit_wreg(struct amdgpu_ring *ring,
3498 uint32_t reg,
3499 uint32_t val)
3500 {
3501 uint32_t cmd = 0;
3502
3503 reg = soc_v1_0_normalize_xcc_reg_offset(reg);
3504
3505 switch (ring->funcs->type) {
3506 case AMDGPU_RING_TYPE_KIQ:
3507 cmd = (1 << 16); /* no inc addr */
3508 break;
3509 default:
3510 cmd = WR_CONFIRM;
3511 break;
3512 }
3513 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3514 amdgpu_ring_write(ring, cmd);
3515 amdgpu_ring_write(ring, reg);
3516 amdgpu_ring_write(ring, 0);
3517 amdgpu_ring_write(ring, val);
3518 }
3519
gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring * ring,uint32_t reg,uint32_t val,uint32_t mask)3520 static void gfx_v12_1_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
3521 uint32_t val, uint32_t mask)
3522 {
3523 gfx_v12_1_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
3524 }
3525
gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring * ring,uint32_t reg0,uint32_t reg1,uint32_t ref,uint32_t mask)3526 static void gfx_v12_1_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
3527 uint32_t reg0, uint32_t reg1,
3528 uint32_t ref, uint32_t mask)
3529 {
3530 gfx_v12_1_wait_reg_mem(ring, 0, 0, 1, reg0, reg1,
3531 ref, mask, 0x20);
3532 }
3533
gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device * adev,int me,int pipe,enum amdgpu_interrupt_state state,int xcc_id)3534 static void gfx_v12_1_xcc_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3535 int me, int pipe,
3536 enum amdgpu_interrupt_state state,
3537 int xcc_id)
3538 {
3539 u32 mec_int_cntl, mec_int_cntl_reg;
3540
3541 /*
3542 * amdgpu controls only the first MEC. That's why this function only
3543 * handles the setting of interrupts for this specific MEC. All other
3544 * pipes' interrupts are set by amdkfd.
3545 */
3546
3547 if (me == 1) {
3548 switch (pipe) {
3549 case 0:
3550 mec_int_cntl_reg = SOC15_REG_OFFSET(
3551 GC, GET_INST(GC, xcc_id),
3552 regCP_ME1_PIPE0_INT_CNTL);
3553 break;
3554 case 1:
3555 mec_int_cntl_reg = SOC15_REG_OFFSET(
3556 GC, GET_INST(GC, xcc_id),
3557 regCP_ME1_PIPE1_INT_CNTL);
3558 break;
3559 case 2:
3560 mec_int_cntl_reg = SOC15_REG_OFFSET(
3561 GC, GET_INST(GC, xcc_id),
3562 regCP_ME1_PIPE2_INT_CNTL);
3563 break;
3564 case 3:
3565 mec_int_cntl_reg = SOC15_REG_OFFSET(
3566 GC, GET_INST(GC, xcc_id),
3567 regCP_ME1_PIPE3_INT_CNTL);
3568 break;
3569 default:
3570 DRM_DEBUG("invalid pipe %d\n", pipe);
3571 return;
3572 }
3573 } else {
3574 DRM_DEBUG("invalid me %d\n", me);
3575 return;
3576 }
3577
3578 switch (state) {
3579 case AMDGPU_IRQ_STATE_DISABLE:
3580 mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
3581 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3582 TIME_STAMP_INT_ENABLE, 0);
3583 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3584 GENERIC0_INT_ENABLE, 0);
3585 WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
3586 break;
3587 case AMDGPU_IRQ_STATE_ENABLE:
3588 mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id);
3589 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3590 TIME_STAMP_INT_ENABLE, 1);
3591 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3592 GENERIC0_INT_ENABLE, 1);
3593 WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id);
3594 break;
3595 default:
3596 break;
3597 }
3598 }
3599
gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * src,unsigned type,enum amdgpu_interrupt_state state)3600 static int gfx_v12_1_set_eop_interrupt_state(struct amdgpu_device *adev,
3601 struct amdgpu_irq_src *src,
3602 unsigned type,
3603 enum amdgpu_interrupt_state state)
3604 {
3605 int i, num_xcc;
3606
3607 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3608 for (i = 0; i < num_xcc; i++) {
3609 switch (type) {
3610 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
3611 gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3612 adev, 1, 0, state, i);
3613 break;
3614 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
3615 gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3616 adev, 1, 1, state, i);
3617 break;
3618 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
3619 gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3620 adev, 1, 2, state, i);
3621 break;
3622 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
3623 gfx_v12_1_xcc_set_compute_eop_interrupt_state(
3624 adev, 1, 3, state, i);
3625 break;
3626 default:
3627 break;
3628 }
3629 }
3630
3631 return 0;
3632 }
3633
gfx_v12_1_eop_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)3634 static int gfx_v12_1_eop_irq(struct amdgpu_device *adev,
3635 struct amdgpu_irq_src *source,
3636 struct amdgpu_iv_entry *entry)
3637 {
3638 u32 doorbell_offset = entry->src_data[0];
3639 u8 me_id, pipe_id, queue_id;
3640 struct amdgpu_ring *ring;
3641 int i, xcc_id;
3642
3643 DRM_DEBUG("IH: CP EOP\n");
3644
3645 if (adev->enable_mes && doorbell_offset) {
3646 amdgpu_userq_process_fence_irq(adev, doorbell_offset);
3647 } else {
3648 me_id = (entry->ring_id & 0x0c) >> 2;
3649 pipe_id = (entry->ring_id & 0x03) >> 0;
3650 queue_id = (entry->ring_id & 0x70) >> 4;
3651 xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
3652
3653 if (xcc_id == -EINVAL)
3654 return -EINVAL;
3655
3656 switch (me_id) {
3657 case 1:
3658 case 2:
3659 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3660 ring = &adev->gfx.compute_ring
3661 [i +
3662 xcc_id * adev->gfx.num_compute_rings];
3663 /* Per-queue interrupt is supported for MEC starting from VI.
3664 * The interrupt can only be enabled/disabled per pipe instead
3665 * of per queue.
3666 */
3667 if ((ring->me == me_id) &&
3668 (ring->pipe == pipe_id) &&
3669 (ring->queue == queue_id))
3670 amdgpu_fence_process(ring);
3671 }
3672 break;
3673 default:
3674 dev_dbg(adev->dev, "Unexpected me %d in eop_irq\n", me_id);
3675 break;
3676 }
3677 }
3678
3679 return 0;
3680 }
3681
gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)3682 static int gfx_v12_1_set_priv_reg_fault_state(struct amdgpu_device *adev,
3683 struct amdgpu_irq_src *source,
3684 unsigned type,
3685 enum amdgpu_interrupt_state state)
3686 {
3687 int i, num_xcc;
3688
3689 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3690 switch (state) {
3691 case AMDGPU_IRQ_STATE_DISABLE:
3692 case AMDGPU_IRQ_STATE_ENABLE:
3693 for (i = 0; i < num_xcc; i++)
3694 WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
3695 PRIV_REG_INT_ENABLE,
3696 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3697 break;
3698 default:
3699 break;
3700 }
3701
3702 return 0;
3703 }
3704
gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)3705 static int gfx_v12_1_set_priv_inst_fault_state(struct amdgpu_device *adev,
3706 struct amdgpu_irq_src *source,
3707 unsigned type,
3708 enum amdgpu_interrupt_state state)
3709 {
3710 int i, num_xcc;
3711
3712 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3713 switch (state) {
3714 case AMDGPU_IRQ_STATE_DISABLE:
3715 case AMDGPU_IRQ_STATE_ENABLE:
3716 for (i = 0; i < num_xcc; i++)
3717 WREG32_FIELD15_PREREG(GC, GET_INST(GC, i), CP_INT_CNTL_RING0,
3718 PRIV_INSTR_INT_ENABLE,
3719 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
3720 break;
3721 default:
3722 break;
3723 }
3724
3725 return 0;
3726 }
3727
gfx_v12_1_handle_priv_fault(struct amdgpu_device * adev,struct amdgpu_iv_entry * entry)3728 static void gfx_v12_1_handle_priv_fault(struct amdgpu_device *adev,
3729 struct amdgpu_iv_entry *entry)
3730 {
3731 u8 me_id, pipe_id, queue_id;
3732 struct amdgpu_ring *ring;
3733 int i, xcc_id;
3734
3735 me_id = (entry->ring_id & 0x0c) >> 2;
3736 pipe_id = (entry->ring_id & 0x03) >> 0;
3737 queue_id = (entry->ring_id & 0x70) >> 4;
3738 xcc_id = gfx_v12_1_ih_to_xcc_inst(adev, entry->node_id);
3739
3740 if (xcc_id == -EINVAL)
3741 return;
3742
3743 if (!adev->gfx.disable_kq) {
3744 switch (me_id) {
3745 case 1:
3746 case 2:
3747 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3748 ring = &adev->gfx.compute_ring
3749 [i +
3750 xcc_id * adev->gfx.num_compute_rings];
3751 if (ring->me == me_id && ring->pipe == pipe_id &&
3752 ring->queue == queue_id)
3753 drm_sched_fault(&ring->sched);
3754 }
3755 break;
3756 default:
3757 dev_dbg(adev->dev, "Unexpected me %d in priv_fault\n", me_id);
3758 break;
3759 }
3760 }
3761 }
3762
gfx_v12_1_priv_reg_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)3763 static int gfx_v12_1_priv_reg_irq(struct amdgpu_device *adev,
3764 struct amdgpu_irq_src *source,
3765 struct amdgpu_iv_entry *entry)
3766 {
3767 DRM_ERROR("Illegal register access in command stream\n");
3768 gfx_v12_1_handle_priv_fault(adev, entry);
3769 return 0;
3770 }
3771
gfx_v12_1_priv_inst_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)3772 static int gfx_v12_1_priv_inst_irq(struct amdgpu_device *adev,
3773 struct amdgpu_irq_src *source,
3774 struct amdgpu_iv_entry *entry)
3775 {
3776 DRM_ERROR("Illegal instruction in command stream\n");
3777 gfx_v12_1_handle_priv_fault(adev, entry);
3778 return 0;
3779 }
3780
gfx_v12_1_rlc_poison_irq(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)3781 static int gfx_v12_1_rlc_poison_irq(struct amdgpu_device *adev,
3782 struct amdgpu_irq_src *source,
3783 struct amdgpu_iv_entry *entry)
3784 {
3785 uint32_t rlc_fed_status = 0;
3786 uint32_t ras_blk = RAS_BLOCK_ID__GFX;
3787 struct ras_ih_info ih_info = {0};
3788 int i, num_xcc;
3789
3790 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3791 for (i = 0; i < num_xcc; i++)
3792 rlc_fed_status |= RREG32(SOC15_REG_OFFSET(GC,
3793 GET_INST(GC, i), regRLC_RLCS_FED_STATUS));
3794
3795 if (!rlc_fed_status)
3796 return 0;
3797
3798 if (REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA0_FED_ERR) ||
3799 REG_GET_FIELD(rlc_fed_status, RLC_RLCS_FED_STATUS, SDMA1_FED_ERR))
3800 ras_blk = RAS_BLOCK_ID__SDMA;
3801
3802 dev_warn(adev->dev, "RLC %d FED IRQ\n", ras_blk);
3803
3804 ih_info.block = ras_blk;
3805 ih_info.reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
3806 amdgpu_ras_mgr_dispatch_interrupt(adev, &ih_info);
3807 return 0;
3808 }
3809
gfx_v12_1_emit_mem_sync(struct amdgpu_ring * ring)3810 static void gfx_v12_1_emit_mem_sync(struct amdgpu_ring *ring)
3811 {
3812 const unsigned int gcr_cntl =
3813 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_INV(1) |
3814 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_WB(1) |
3815 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLV_INV(1) |
3816 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
3817 PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1) |
3818 PACKET3_ACQUIRE_MEM_GCR_CNTL_GL2_SCOPE(2);
3819
3820 /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
3821 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
3822 amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
3823 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
3824 amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
3825 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
3826 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
3827 amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
3828 amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
3829 }
3830
3831 static const struct amd_ip_funcs gfx_v12_1_ip_funcs = {
3832 .name = "gfx_v12_1",
3833 .early_init = gfx_v12_1_early_init,
3834 .late_init = gfx_v12_1_late_init,
3835 .sw_init = gfx_v12_1_sw_init,
3836 .sw_fini = gfx_v12_1_sw_fini,
3837 .hw_init = gfx_v12_1_hw_init,
3838 .hw_fini = gfx_v12_1_hw_fini,
3839 .suspend = gfx_v12_1_suspend,
3840 .resume = gfx_v12_1_resume,
3841 .is_idle = gfx_v12_1_is_idle,
3842 .wait_for_idle = gfx_v12_1_wait_for_idle,
3843 .set_clockgating_state = gfx_v12_1_set_clockgating_state,
3844 .set_powergating_state = gfx_v12_1_set_powergating_state,
3845 .get_clockgating_state = gfx_v12_1_get_clockgating_state,
3846 };
3847
3848 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_compute = {
3849 .type = AMDGPU_RING_TYPE_COMPUTE,
3850 .align_mask = 0xff,
3851 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
3852 .support_64bit_ptrs = true,
3853 .get_rptr = gfx_v12_1_ring_get_rptr_compute,
3854 .get_wptr = gfx_v12_1_ring_get_wptr_compute,
3855 .set_wptr = gfx_v12_1_ring_set_wptr_compute,
3856 .emit_frame_size =
3857 7 + /* gfx_v12_1_ring_emit_pipeline_sync */
3858 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
3859 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
3860 2 + /* gfx_v12_1_ring_emit_vm_flush */
3861 8 + 8 + 8 + /* gfx_v12_1_ring_emit_fence x3 for user fence, vm fence */
3862 8, /* gfx_v12_1_emit_mem_sync */
3863 .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */
3864 .emit_ib = gfx_v12_1_ring_emit_ib_compute,
3865 .emit_fence = gfx_v12_1_ring_emit_fence,
3866 .emit_pipeline_sync = gfx_v12_1_ring_emit_pipeline_sync,
3867 .emit_vm_flush = gfx_v12_1_ring_emit_vm_flush,
3868 .test_ring = gfx_v12_1_ring_test_ring,
3869 .test_ib = gfx_v12_1_ring_test_ib,
3870 .insert_nop = amdgpu_ring_insert_nop,
3871 .pad_ib = amdgpu_ring_generic_pad_ib,
3872 .emit_wreg = gfx_v12_1_ring_emit_wreg,
3873 .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
3874 .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
3875 .emit_mem_sync = gfx_v12_1_emit_mem_sync,
3876 };
3877
3878 static const struct amdgpu_ring_funcs gfx_v12_1_ring_funcs_kiq = {
3879 .type = AMDGPU_RING_TYPE_KIQ,
3880 .align_mask = 0xff,
3881 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
3882 .support_64bit_ptrs = true,
3883 .get_rptr = gfx_v12_1_ring_get_rptr_compute,
3884 .get_wptr = gfx_v12_1_ring_get_wptr_compute,
3885 .set_wptr = gfx_v12_1_ring_set_wptr_compute,
3886 .emit_frame_size =
3887 7 + /* gfx_v12_1_ring_emit_pipeline_sync */
3888 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
3889 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
3890 2 + /* gfx_v12_1_ring_emit_vm_flush */
3891 8 + 8 + 8, /* gfx_v12_1_ring_emit_fence_kiq x3 for user fence, vm fence */
3892 .emit_ib_size = 7, /* gfx_v12_1_ring_emit_ib_compute */
3893 .emit_ib = gfx_v12_1_ring_emit_ib_compute,
3894 .emit_fence = gfx_v12_1_ring_emit_fence_kiq,
3895 .test_ring = gfx_v12_1_ring_test_ring,
3896 .test_ib = gfx_v12_1_ring_test_ib,
3897 .insert_nop = amdgpu_ring_insert_nop,
3898 .pad_ib = amdgpu_ring_generic_pad_ib,
3899 .emit_rreg = gfx_v12_1_ring_emit_rreg,
3900 .emit_wreg = gfx_v12_1_ring_emit_wreg,
3901 .emit_reg_wait = gfx_v12_1_ring_emit_reg_wait,
3902 .emit_reg_write_reg_wait = gfx_v12_1_ring_emit_reg_write_reg_wait,
3903 };
3904
gfx_v12_1_set_ring_funcs(struct amdgpu_device * adev)3905 static void gfx_v12_1_set_ring_funcs(struct amdgpu_device *adev)
3906 {
3907 int i, j, num_xcc;
3908
3909 num_xcc = NUM_XCC(adev->gfx.xcc_mask);
3910 for (i = 0; i < num_xcc; i++) {
3911 adev->gfx.kiq[i].ring.funcs = &gfx_v12_1_ring_funcs_kiq;
3912
3913 for (j = 0; j < adev->gfx.num_compute_rings; j++)
3914 adev->gfx.compute_ring[j + i * adev->gfx.num_compute_rings].funcs =
3915 &gfx_v12_1_ring_funcs_compute;
3916 }
3917 }
3918
3919 static const struct amdgpu_irq_src_funcs gfx_v12_1_eop_irq_funcs = {
3920 .set = gfx_v12_1_set_eop_interrupt_state,
3921 .process = gfx_v12_1_eop_irq,
3922 };
3923
3924 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_reg_irq_funcs = {
3925 .set = gfx_v12_1_set_priv_reg_fault_state,
3926 .process = gfx_v12_1_priv_reg_irq,
3927 };
3928
3929 static const struct amdgpu_irq_src_funcs gfx_v12_1_priv_inst_irq_funcs = {
3930 .set = gfx_v12_1_set_priv_inst_fault_state,
3931 .process = gfx_v12_1_priv_inst_irq,
3932 };
3933
3934 static const struct amdgpu_irq_src_funcs gfx_v12_1_rlc_poison_irq_funcs = {
3935 .process = gfx_v12_1_rlc_poison_irq,
3936 };
3937
gfx_v12_1_set_irq_funcs(struct amdgpu_device * adev)3938 static void gfx_v12_1_set_irq_funcs(struct amdgpu_device *adev)
3939 {
3940 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
3941 adev->gfx.eop_irq.funcs = &gfx_v12_1_eop_irq_funcs;
3942
3943 adev->gfx.priv_reg_irq.num_types = 1;
3944 adev->gfx.priv_reg_irq.funcs = &gfx_v12_1_priv_reg_irq_funcs;
3945
3946 adev->gfx.priv_inst_irq.num_types = 1;
3947 adev->gfx.priv_inst_irq.funcs = &gfx_v12_1_priv_inst_irq_funcs;
3948
3949 adev->gfx.rlc_poison_irq.num_types = 1;
3950 adev->gfx.rlc_poison_irq.funcs = &gfx_v12_1_rlc_poison_irq_funcs;
3951 }
3952
gfx_v12_1_set_imu_funcs(struct amdgpu_device * adev)3953 static void gfx_v12_1_set_imu_funcs(struct amdgpu_device *adev)
3954 {
3955 if (adev->flags & AMD_IS_APU)
3956 adev->gfx.imu.mode = MISSION_MODE;
3957 else
3958 adev->gfx.imu.mode = DEBUG_MODE;
3959 if (!amdgpu_sriov_vf(adev))
3960 adev->gfx.imu.funcs = &gfx_v12_1_imu_funcs;
3961 }
3962
gfx_v12_1_set_rlc_funcs(struct amdgpu_device * adev)3963 static void gfx_v12_1_set_rlc_funcs(struct amdgpu_device *adev)
3964 {
3965 adev->gfx.rlc.funcs = &gfx_v12_1_rlc_funcs;
3966 }
3967
gfx_v12_1_set_mqd_funcs(struct amdgpu_device * adev)3968 static void gfx_v12_1_set_mqd_funcs(struct amdgpu_device *adev)
3969 {
3970 /* set compute eng mqd */
3971 adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size =
3972 sizeof(struct v12_1_compute_mqd);
3973 adev->mqds[AMDGPU_HW_IP_COMPUTE].init_mqd =
3974 gfx_v12_1_compute_mqd_init;
3975 }
3976
gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device * adev,u32 bitmap,int xcc_id)3977 static void gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(struct amdgpu_device *adev,
3978 u32 bitmap, int xcc_id)
3979 {
3980 u32 data;
3981
3982 if (!bitmap)
3983 return;
3984
3985 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
3986 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
3987
3988 WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data);
3989 }
3990
gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device * adev,int xcc_id)3991 static u32 gfx_v12_1_get_cu_active_bitmap_per_sh(struct amdgpu_device *adev,
3992 int xcc_id)
3993 {
3994 u32 data, mask;
3995
3996 data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG);
3997 data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG);
3998
3999 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS_MASK;
4000 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_WGPS__SHIFT;
4001
4002 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
4003
4004 return (~data) & mask;
4005 }
4006
gfx_v12_1_get_cu_info(struct amdgpu_device * adev,struct amdgpu_cu_info * cu_info)4007 static int gfx_v12_1_get_cu_info(struct amdgpu_device *adev,
4008 struct amdgpu_cu_info *cu_info)
4009 {
4010 int i, j, k, counter, xcc_id, active_cu_number = 0;
4011 u32 mask, bitmap;
4012 unsigned int disable_masks[2 * 2];
4013
4014 if (!adev || !cu_info)
4015 return -EINVAL;
4016
4017 if (adev->gfx.config.max_shader_engines > 2 ||
4018 adev->gfx.config.max_sh_per_se > 2) {
4019 dev_err(adev->dev,
4020 "Max SE (%d) and Max SA per SE (%d) is greater than expected\n",
4021 adev->gfx.config.max_shader_engines,
4022 adev->gfx.config.max_sh_per_se);
4023 return -EINVAL;
4024 }
4025
4026 amdgpu_gfx_parse_disable_cu(adev, disable_masks,
4027 adev->gfx.config.max_shader_engines,
4028 adev->gfx.config.max_sh_per_se);
4029
4030 mutex_lock(&adev->grbm_idx_mutex);
4031 for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) {
4032 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4033 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4034 bitmap = i * adev->gfx.config.max_sh_per_se + j;
4035 if (!((gfx_v12_1_get_sa_active_bitmap(adev, xcc_id) >> bitmap) & 1))
4036 continue;
4037 mask = 1;
4038 counter = 0;
4039 gfx_v12_1_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
4040 gfx_v12_1_set_user_cu_inactive_bitmap_per_sh(
4041 adev,
4042 disable_masks[i * adev->gfx.config.max_sh_per_se + j],
4043 xcc_id);
4044 bitmap = gfx_v12_1_get_cu_active_bitmap_per_sh(adev, xcc_id);
4045
4046 cu_info->bitmap[xcc_id][i][j] = bitmap;
4047
4048 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
4049 if (bitmap & mask)
4050 counter++;
4051
4052 mask <<= 1;
4053 }
4054 active_cu_number += counter;
4055 }
4056 }
4057 gfx_v12_1_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id);
4058 }
4059 mutex_unlock(&adev->grbm_idx_mutex);
4060
4061 cu_info->number = active_cu_number;
4062 cu_info->simd_per_cu = NUM_SIMD_PER_CU_GFX12_1;
4063 cu_info->lds_size = 320;
4064
4065 return 0;
4066 }
4067
4068 const struct amdgpu_ip_block_version gfx_v12_1_ip_block = {
4069 .type = AMD_IP_BLOCK_TYPE_GFX,
4070 .major = 12,
4071 .minor = 1,
4072 .rev = 0,
4073 .funcs = &gfx_v12_1_ip_funcs,
4074 };
4075
gfx_v12_1_xcp_resume(void * handle,uint32_t inst_mask)4076 static int gfx_v12_1_xcp_resume(void *handle, uint32_t inst_mask)
4077 {
4078 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4079 uint32_t tmp_mask;
4080 int i, r;
4081
4082 /* TODO : Initialize golden regs */
4083 /* gfx_v12_1_init_golden_registers(adev); */
4084
4085 tmp_mask = inst_mask;
4086 for_each_inst(i, tmp_mask)
4087 gfx_v12_1_xcc_constants_init(adev, i);
4088
4089 if (!amdgpu_sriov_vf(adev)) {
4090 tmp_mask = inst_mask;
4091 for_each_inst(i, tmp_mask) {
4092 r = gfx_v12_1_xcc_rlc_resume(adev, i);
4093 if (r)
4094 return r;
4095 }
4096 }
4097
4098 r = gfx_v12_1_xcc_cp_resume(adev, inst_mask);
4099
4100 return r;
4101 }
4102
gfx_v12_1_xcp_suspend(void * handle,uint32_t inst_mask)4103 static int gfx_v12_1_xcp_suspend(void *handle, uint32_t inst_mask)
4104 {
4105 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4106 int i;
4107
4108 for_each_inst(i, inst_mask)
4109 gfx_v12_1_xcc_fini(adev, i);
4110
4111 return 0;
4112 }
4113
4114 struct amdgpu_xcp_ip_funcs gfx_v12_1_xcp_funcs = {
4115 .suspend = &gfx_v12_1_xcp_suspend,
4116 .resume = &gfx_v12_1_xcp_resume
4117 };
4118