xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision 99a97a8ba9881fc47901ff36b057e5cd0bf06af0)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "soc15.h"
28 #include "soc15d.h"
29 
30 #include "vega10/soc15ip.h"
31 #include "vega10/GC/gc_9_0_offset.h"
32 #include "vega10/GC/gc_9_0_sh_mask.h"
33 #include "vega10/vega10_enum.h"
34 #include "vega10/HDP/hdp_4_0_offset.h"
35 
36 #include "soc15_common.h"
37 #include "clearstate_gfx9.h"
38 #include "v9_structs.h"
39 
40 #define GFX9_NUM_GFX_RINGS     1
41 #define GFX9_NUM_COMPUTE_RINGS 8
42 #define GFX9_NUM_SE		4
43 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
44 
45 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
46 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
47 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
48 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
49 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
50 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
51 
52 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
53 {
54 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
55 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)},
56 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
57 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)},
58 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
59 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)},
60 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
61 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)},
62 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE),
63 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID4), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID4)},
64 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID5_SIZE),
65 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID5), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID5)},
66 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID6_SIZE),
67 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID6), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID6)},
68 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID7_SIZE),
69 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID7), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID7)},
70 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID8_SIZE),
71 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID8), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID8)},
72 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID9_SIZE),
73 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID9), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID9)},
74 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID10_SIZE),
75 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID10), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID10)},
76 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID11_SIZE),
77 	       	SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID11), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID11)},
78 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID12_SIZE),
79 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID12), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID12)},
80 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID13_SIZE),
81 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID13), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID13)},
82 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID14_SIZE),
83 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID14), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID14)},
84 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID15_SIZE),
85 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID15), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID15)}
86 };
87 
88 static const u32 golden_settings_gc_9_0[] =
89 {
90 	SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400,
91 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024,
92 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001,
93 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000,
94 	SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000,
95 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68,
96 	SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197,
97 	SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff
98 };
99 
100 static const u32 golden_settings_gc_9_0_vg10[] =
101 {
102 	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0x0000f000, 0x00012107,
103 	SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000,
104 	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x2a114042,
105 	SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042,
106 	SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000,
107 	SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000,
108 	SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800,
109 	SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007
110 };
111 
112 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
113 
114 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
115 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
116 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
117 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
118 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
119                                  struct amdgpu_cu_info *cu_info);
120 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
121 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
122 
123 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
124 {
125 	switch (adev->asic_type) {
126 	case CHIP_VEGA10:
127 		amdgpu_program_register_sequence(adev,
128 						 golden_settings_gc_9_0,
129 						 (const u32)ARRAY_SIZE(golden_settings_gc_9_0));
130 		amdgpu_program_register_sequence(adev,
131 						 golden_settings_gc_9_0_vg10,
132 						 (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10));
133 		break;
134 	default:
135 		break;
136 	}
137 }
138 
139 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
140 {
141 	adev->gfx.scratch.num_reg = 7;
142 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
143 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
144 }
145 
146 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
147 				       bool wc, uint32_t reg, uint32_t val)
148 {
149 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
150 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
151 				WRITE_DATA_DST_SEL(0) |
152 				(wc ? WR_CONFIRM : 0));
153 	amdgpu_ring_write(ring, reg);
154 	amdgpu_ring_write(ring, 0);
155 	amdgpu_ring_write(ring, val);
156 }
157 
158 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
159 				  int mem_space, int opt, uint32_t addr0,
160 				  uint32_t addr1, uint32_t ref, uint32_t mask,
161 				  uint32_t inv)
162 {
163 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
164 	amdgpu_ring_write(ring,
165 				 /* memory (1) or register (0) */
166 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
167 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
168 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
169 				 WAIT_REG_MEM_ENGINE(eng_sel)));
170 
171 	if (mem_space)
172 		BUG_ON(addr0 & 0x3); /* Dword align */
173 	amdgpu_ring_write(ring, addr0);
174 	amdgpu_ring_write(ring, addr1);
175 	amdgpu_ring_write(ring, ref);
176 	amdgpu_ring_write(ring, mask);
177 	amdgpu_ring_write(ring, inv); /* poll interval */
178 }
179 
180 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
181 {
182 	struct amdgpu_device *adev = ring->adev;
183 	uint32_t scratch;
184 	uint32_t tmp = 0;
185 	unsigned i;
186 	int r;
187 
188 	r = amdgpu_gfx_scratch_get(adev, &scratch);
189 	if (r) {
190 		DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
191 		return r;
192 	}
193 	WREG32(scratch, 0xCAFEDEAD);
194 	r = amdgpu_ring_alloc(ring, 3);
195 	if (r) {
196 		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
197 			  ring->idx, r);
198 		amdgpu_gfx_scratch_free(adev, scratch);
199 		return r;
200 	}
201 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
202 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
203 	amdgpu_ring_write(ring, 0xDEADBEEF);
204 	amdgpu_ring_commit(ring);
205 
206 	for (i = 0; i < adev->usec_timeout; i++) {
207 		tmp = RREG32(scratch);
208 		if (tmp == 0xDEADBEEF)
209 			break;
210 		DRM_UDELAY(1);
211 	}
212 	if (i < adev->usec_timeout) {
213 		DRM_INFO("ring test on %d succeeded in %d usecs\n",
214 			 ring->idx, i);
215 	} else {
216 		DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
217 			  ring->idx, scratch, tmp);
218 		r = -EINVAL;
219 	}
220 	amdgpu_gfx_scratch_free(adev, scratch);
221 	return r;
222 }
223 
224 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
225 {
226         struct amdgpu_device *adev = ring->adev;
227         struct amdgpu_ib ib;
228         struct dma_fence *f = NULL;
229         uint32_t scratch;
230         uint32_t tmp = 0;
231         long r;
232 
233         r = amdgpu_gfx_scratch_get(adev, &scratch);
234         if (r) {
235                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
236                 return r;
237         }
238         WREG32(scratch, 0xCAFEDEAD);
239         memset(&ib, 0, sizeof(ib));
240         r = amdgpu_ib_get(adev, NULL, 256, &ib);
241         if (r) {
242                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
243                 goto err1;
244         }
245         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
246         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
247         ib.ptr[2] = 0xDEADBEEF;
248         ib.length_dw = 3;
249 
250         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
251         if (r)
252                 goto err2;
253 
254         r = dma_fence_wait_timeout(f, false, timeout);
255         if (r == 0) {
256                 DRM_ERROR("amdgpu: IB test timed out.\n");
257                 r = -ETIMEDOUT;
258                 goto err2;
259         } else if (r < 0) {
260                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
261                 goto err2;
262         }
263         tmp = RREG32(scratch);
264         if (tmp == 0xDEADBEEF) {
265                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
266                 r = 0;
267         } else {
268                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
269                           scratch, tmp);
270                 r = -EINVAL;
271         }
272 err2:
273         amdgpu_ib_free(adev, &ib, NULL);
274         dma_fence_put(f);
275 err1:
276         amdgpu_gfx_scratch_free(adev, scratch);
277         return r;
278 }
279 
280 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
281 {
282 	const char *chip_name;
283 	char fw_name[30];
284 	int err;
285 	struct amdgpu_firmware_info *info = NULL;
286 	const struct common_firmware_header *header = NULL;
287 	const struct gfx_firmware_header_v1_0 *cp_hdr;
288 
289 	DRM_DEBUG("\n");
290 
291 	switch (adev->asic_type) {
292 	case CHIP_VEGA10:
293 		chip_name = "vega10";
294 		break;
295 	default:
296 		BUG();
297 	}
298 
299 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
300 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
301 	if (err)
302 		goto out;
303 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
304 	if (err)
305 		goto out;
306 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
307 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
308 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
309 
310 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
311 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
312 	if (err)
313 		goto out;
314 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
315 	if (err)
316 		goto out;
317 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
318 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
319 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
320 
321 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
322 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
323 	if (err)
324 		goto out;
325 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
326 	if (err)
327 		goto out;
328 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
329 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
330 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
331 
332 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
333 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
334 	if (err)
335 		goto out;
336 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
337 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
338 	adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
339 	adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
340 
341 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
342 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
343 	if (err)
344 		goto out;
345 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
346 	if (err)
347 		goto out;
348 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
349 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
350 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
351 
352 
353 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
354 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
355 	if (!err) {
356 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
357 		if (err)
358 			goto out;
359 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
360 		adev->gfx.mec2_fw->data;
361 		adev->gfx.mec2_fw_version =
362 		le32_to_cpu(cp_hdr->header.ucode_version);
363 		adev->gfx.mec2_feature_version =
364 		le32_to_cpu(cp_hdr->ucode_feature_version);
365 	} else {
366 		err = 0;
367 		adev->gfx.mec2_fw = NULL;
368 	}
369 
370 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
371 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
372 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
373 		info->fw = adev->gfx.pfp_fw;
374 		header = (const struct common_firmware_header *)info->fw->data;
375 		adev->firmware.fw_size +=
376 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
377 
378 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
379 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
380 		info->fw = adev->gfx.me_fw;
381 		header = (const struct common_firmware_header *)info->fw->data;
382 		adev->firmware.fw_size +=
383 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
384 
385 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
386 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
387 		info->fw = adev->gfx.ce_fw;
388 		header = (const struct common_firmware_header *)info->fw->data;
389 		adev->firmware.fw_size +=
390 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
391 
392 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
393 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
394 		info->fw = adev->gfx.rlc_fw;
395 		header = (const struct common_firmware_header *)info->fw->data;
396 		adev->firmware.fw_size +=
397 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
398 
399 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
400 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
401 		info->fw = adev->gfx.mec_fw;
402 		header = (const struct common_firmware_header *)info->fw->data;
403 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
404 		adev->firmware.fw_size +=
405 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
406 
407 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
408 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
409 		info->fw = adev->gfx.mec_fw;
410 		adev->firmware.fw_size +=
411 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
412 
413 		if (adev->gfx.mec2_fw) {
414 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
415 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
416 			info->fw = adev->gfx.mec2_fw;
417 			header = (const struct common_firmware_header *)info->fw->data;
418 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
419 			adev->firmware.fw_size +=
420 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
421 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
422 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
423 			info->fw = adev->gfx.mec2_fw;
424 			adev->firmware.fw_size +=
425 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
426 		}
427 
428 	}
429 
430 out:
431 	if (err) {
432 		dev_err(adev->dev,
433 			"gfx9: Failed to load firmware \"%s\"\n",
434 			fw_name);
435 		release_firmware(adev->gfx.pfp_fw);
436 		adev->gfx.pfp_fw = NULL;
437 		release_firmware(adev->gfx.me_fw);
438 		adev->gfx.me_fw = NULL;
439 		release_firmware(adev->gfx.ce_fw);
440 		adev->gfx.ce_fw = NULL;
441 		release_firmware(adev->gfx.rlc_fw);
442 		adev->gfx.rlc_fw = NULL;
443 		release_firmware(adev->gfx.mec_fw);
444 		adev->gfx.mec_fw = NULL;
445 		release_firmware(adev->gfx.mec2_fw);
446 		adev->gfx.mec2_fw = NULL;
447 	}
448 	return err;
449 }
450 
451 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
452 {
453 	int r;
454 
455 	if (adev->gfx.mec.hpd_eop_obj) {
456 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
457 		if (unlikely(r != 0))
458 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
459 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
460 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
461 
462 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
463 		adev->gfx.mec.hpd_eop_obj = NULL;
464 	}
465 	if (adev->gfx.mec.mec_fw_obj) {
466 		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
467 		if (unlikely(r != 0))
468 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
469 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
470 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
471 
472 		amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
473 		adev->gfx.mec.mec_fw_obj = NULL;
474 	}
475 }
476 
477 #define MEC_HPD_SIZE 2048
478 
479 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
480 {
481 	int r;
482 	u32 *hpd;
483 	const __le32 *fw_data;
484 	unsigned fw_size;
485 	u32 *fw;
486 
487 	const struct gfx_firmware_header_v1_0 *mec_hdr;
488 
489 	/*
490 	 * we assign only 1 pipe because all other pipes will
491 	 * be handled by KFD
492 	 */
493 	adev->gfx.mec.num_mec = 1;
494 	adev->gfx.mec.num_pipe = 1;
495 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
496 
497 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
498 		r = amdgpu_bo_create(adev,
499 				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
500 				     PAGE_SIZE, true,
501 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
502 				     &adev->gfx.mec.hpd_eop_obj);
503 		if (r) {
504 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
505 			return r;
506 		}
507 	}
508 
509 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
510 	if (unlikely(r != 0)) {
511 		gfx_v9_0_mec_fini(adev);
512 		return r;
513 	}
514 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
515 			  &adev->gfx.mec.hpd_eop_gpu_addr);
516 	if (r) {
517 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
518 		gfx_v9_0_mec_fini(adev);
519 		return r;
520 	}
521 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
522 	if (r) {
523 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
524 		gfx_v9_0_mec_fini(adev);
525 		return r;
526 	}
527 
528 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
529 
530 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
531 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
532 
533 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
534 
535 	fw_data = (const __le32 *)
536 		(adev->gfx.mec_fw->data +
537 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
538 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
539 
540 	if (adev->gfx.mec.mec_fw_obj == NULL) {
541 		r = amdgpu_bo_create(adev,
542 			mec_hdr->header.ucode_size_bytes,
543 			PAGE_SIZE, true,
544 			AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
545 			&adev->gfx.mec.mec_fw_obj);
546 		if (r) {
547 			dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
548 			return r;
549 		}
550 	}
551 
552 	r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
553 	if (unlikely(r != 0)) {
554 		gfx_v9_0_mec_fini(adev);
555 		return r;
556 	}
557 	r = amdgpu_bo_pin(adev->gfx.mec.mec_fw_obj, AMDGPU_GEM_DOMAIN_GTT,
558 			&adev->gfx.mec.mec_fw_gpu_addr);
559 	if (r) {
560 		dev_warn(adev->dev, "(%d) pin mec firmware bo failed\n", r);
561 		gfx_v9_0_mec_fini(adev);
562 		return r;
563 	}
564 	r = amdgpu_bo_kmap(adev->gfx.mec.mec_fw_obj, (void **)&fw);
565 	if (r) {
566 		dev_warn(adev->dev, "(%d) map firmware bo failed\n", r);
567 		gfx_v9_0_mec_fini(adev);
568 		return r;
569 	}
570 	memcpy(fw, fw_data, fw_size);
571 
572 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
573 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
574 
575 
576 	return 0;
577 }
578 
579 static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev)
580 {
581 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
582 
583 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
584 }
585 
586 static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
587 {
588 	int r;
589 	u32 *hpd;
590 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
591 
592 	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
593 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
594 				    &kiq->eop_gpu_addr, (void **)&hpd);
595 	if (r) {
596 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
597 		return r;
598 	}
599 
600 	memset(hpd, 0, MEC_HPD_SIZE);
601 
602 	r = amdgpu_bo_reserve(kiq->eop_obj, false);
603 	if (unlikely(r != 0))
604 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
605 	amdgpu_bo_kunmap(kiq->eop_obj);
606 	amdgpu_bo_unreserve(kiq->eop_obj);
607 
608 	return 0;
609 }
610 
611 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
612 				  struct amdgpu_ring *ring,
613 				  struct amdgpu_irq_src *irq)
614 {
615 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
616 	int r = 0;
617 
618 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
619 	if (r)
620 		return r;
621 
622 	ring->adev = NULL;
623 	ring->ring_obj = NULL;
624 	ring->use_doorbell = true;
625 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
626 	if (adev->gfx.mec2_fw) {
627 		ring->me = 2;
628 		ring->pipe = 0;
629 	} else {
630 		ring->me = 1;
631 		ring->pipe = 1;
632 	}
633 
634 	irq->data = ring;
635 	ring->queue = 0;
636 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
637 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
638 	r = amdgpu_ring_init(adev, ring, 1024,
639 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
640 	if (r)
641 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
642 
643 	return r;
644 }
645 static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
646 				   struct amdgpu_irq_src *irq)
647 {
648 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
649 	amdgpu_ring_fini(ring);
650 	irq->data = NULL;
651 }
652 
653 /* create MQD for each compute queue */
654 static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev)
655 {
656 	struct amdgpu_ring *ring = NULL;
657 	int r, i;
658 
659 	/* create MQD for KIQ */
660 	ring = &adev->gfx.kiq.ring;
661 	if (!ring->mqd_obj) {
662 		r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
663 					    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
664 					    &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
665 		if (r) {
666 			dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
667 			return r;
668 		}
669 
670 		/*TODO: prepare MQD backup */
671 	}
672 
673 	/* create MQD for each KCQ */
674 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
675 		ring = &adev->gfx.compute_ring[i];
676 		if (!ring->mqd_obj) {
677 			r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE,
678 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
679 						    &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
680 			if (r) {
681 				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
682 				return r;
683 			}
684 
685 			/* TODO: prepare MQD backup */
686 		}
687 	}
688 
689 	return 0;
690 }
691 
692 static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev)
693 {
694 	struct amdgpu_ring *ring = NULL;
695 	int i;
696 
697 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
698 		ring = &adev->gfx.compute_ring[i];
699 		amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
700 	}
701 
702 	ring = &adev->gfx.kiq.ring;
703 	amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr);
704 }
705 
706 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
707 {
708 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
709 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
710 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
711 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
712 		(SQ_IND_INDEX__FORCE_READ_MASK));
713 	return RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
714 }
715 
716 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
717 			   uint32_t wave, uint32_t thread,
718 			   uint32_t regno, uint32_t num, uint32_t *out)
719 {
720 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_INDEX),
721 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
722 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
723 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
724 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
725 		(SQ_IND_INDEX__FORCE_READ_MASK) |
726 		(SQ_IND_INDEX__AUTO_INCR_MASK));
727 	while (num--)
728 		*(out++) = RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_IND_DATA));
729 }
730 
731 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
732 {
733 	/* type 1 wave data */
734 	dst[(*no_fields)++] = 1;
735 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
736 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
737 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
738 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
739 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
740 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
741 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
742 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
743 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
744 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
745 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
746 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
747 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
748 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
749 }
750 
751 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
752 				     uint32_t wave, uint32_t start,
753 				     uint32_t size, uint32_t *dst)
754 {
755 	wave_read_regs(
756 		adev, simd, wave, 0,
757 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
758 }
759 
760 
761 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
762 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
763 	.select_se_sh = &gfx_v9_0_select_se_sh,
764 	.read_wave_data = &gfx_v9_0_read_wave_data,
765 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
766 };
767 
768 static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
769 {
770 	u32 gb_addr_config;
771 
772 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
773 
774 	switch (adev->asic_type) {
775 	case CHIP_VEGA10:
776 		adev->gfx.config.max_shader_engines = 4;
777 		adev->gfx.config.max_tile_pipes = 8; //??
778 		adev->gfx.config.max_cu_per_sh = 16;
779 		adev->gfx.config.max_sh_per_se = 1;
780 		adev->gfx.config.max_backends_per_se = 4;
781 		adev->gfx.config.max_texture_channel_caches = 16;
782 		adev->gfx.config.max_gprs = 256;
783 		adev->gfx.config.max_gs_threads = 32;
784 		adev->gfx.config.max_hw_contexts = 8;
785 
786 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
787 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
788 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
789 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
790 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
791 		break;
792 	default:
793 		BUG();
794 		break;
795 	}
796 
797 	adev->gfx.config.gb_addr_config = gb_addr_config;
798 
799 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
800 			REG_GET_FIELD(
801 					adev->gfx.config.gb_addr_config,
802 					GB_ADDR_CONFIG,
803 					NUM_PIPES);
804 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
805 			REG_GET_FIELD(
806 					adev->gfx.config.gb_addr_config,
807 					GB_ADDR_CONFIG,
808 					NUM_BANKS);
809 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
810 			REG_GET_FIELD(
811 					adev->gfx.config.gb_addr_config,
812 					GB_ADDR_CONFIG,
813 					MAX_COMPRESSED_FRAGS);
814 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
815 			REG_GET_FIELD(
816 					adev->gfx.config.gb_addr_config,
817 					GB_ADDR_CONFIG,
818 					NUM_RB_PER_SE);
819 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
820 			REG_GET_FIELD(
821 					adev->gfx.config.gb_addr_config,
822 					GB_ADDR_CONFIG,
823 					NUM_SHADER_ENGINES);
824 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
825 			REG_GET_FIELD(
826 					adev->gfx.config.gb_addr_config,
827 					GB_ADDR_CONFIG,
828 					PIPE_INTERLEAVE_SIZE));
829 }
830 
831 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
832 				   struct amdgpu_ngg_buf *ngg_buf,
833 				   int size_se,
834 				   int default_size_se)
835 {
836 	int r;
837 
838 	if (size_se < 0) {
839 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
840 		return -EINVAL;
841 	}
842 	size_se = size_se ? size_se : default_size_se;
843 
844 	ngg_buf->size = size_se * GFX9_NUM_SE;
845 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
846 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
847 				    &ngg_buf->bo,
848 				    &ngg_buf->gpu_addr,
849 				    NULL);
850 	if (r) {
851 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
852 		return r;
853 	}
854 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
855 
856 	return r;
857 }
858 
859 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
860 {
861 	int i;
862 
863 	for (i = 0; i < NGG_BUF_MAX; i++)
864 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
865 				      &adev->gfx.ngg.buf[i].gpu_addr,
866 				      NULL);
867 
868 	memset(&adev->gfx.ngg.buf[0], 0,
869 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
870 
871 	adev->gfx.ngg.init = false;
872 
873 	return 0;
874 }
875 
876 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
877 {
878 	int r;
879 
880 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
881 		return 0;
882 
883 	/* GDS reserve memory: 64 bytes alignment */
884 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
885 	adev->gds.mem.total_size -= adev->gfx.ngg.gds_reserve_size;
886 	adev->gds.mem.gfx_partition_size -= adev->gfx.ngg.gds_reserve_size;
887 	adev->gfx.ngg.gds_reserve_addr = amdgpu_gds_reg_offset[0].mem_base;
888 	adev->gfx.ngg.gds_reserve_addr += adev->gds.mem.gfx_partition_size;
889 
890 	/* Primitive Buffer */
891 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PRIM],
892 				    amdgpu_prim_buf_per_se,
893 				    64 * 1024);
894 	if (r) {
895 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
896 		goto err;
897 	}
898 
899 	/* Position Buffer */
900 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[POS],
901 				    amdgpu_pos_buf_per_se,
902 				    256 * 1024);
903 	if (r) {
904 		dev_err(adev->dev, "Failed to create Position Buffer\n");
905 		goto err;
906 	}
907 
908 	/* Control Sideband */
909 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[CNTL],
910 				    amdgpu_cntl_sb_buf_per_se,
911 				    256);
912 	if (r) {
913 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
914 		goto err;
915 	}
916 
917 	/* Parameter Cache, not created by default */
918 	if (amdgpu_param_buf_per_se <= 0)
919 		goto out;
920 
921 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[PARAM],
922 				    amdgpu_param_buf_per_se,
923 				    512 * 1024);
924 	if (r) {
925 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
926 		goto err;
927 	}
928 
929 out:
930 	adev->gfx.ngg.init = true;
931 	return 0;
932 err:
933 	gfx_v9_0_ngg_fini(adev);
934 	return r;
935 }
936 
937 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
938 {
939 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
940 	int r;
941 	u32 data;
942 	u32 size;
943 	u32 base;
944 
945 	if (!amdgpu_ngg)
946 		return 0;
947 
948 	/* Program buffer size */
949 	data = 0;
950 	size = adev->gfx.ngg.buf[PRIM].size / 256;
951 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, size);
952 
953 	size = adev->gfx.ngg.buf[POS].size / 256;
954 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, size);
955 
956 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_1), data);
957 
958 	data = 0;
959 	size = adev->gfx.ngg.buf[CNTL].size / 256;
960 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, size);
961 
962 	size = adev->gfx.ngg.buf[PARAM].size / 1024;
963 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, size);
964 
965 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_BUF_RESOURCE_2), data);
966 
967 	/* Program buffer base address */
968 	base = lower_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
969 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
970 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE), data);
971 
972 	base = upper_32_bits(adev->gfx.ngg.buf[PRIM].gpu_addr);
973 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
974 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_INDEX_BUF_BASE_HI), data);
975 
976 	base = lower_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
977 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
978 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE), data);
979 
980 	base = upper_32_bits(adev->gfx.ngg.buf[POS].gpu_addr);
981 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
982 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_POS_BUF_BASE_HI), data);
983 
984 	base = lower_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
985 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
986 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE), data);
987 
988 	base = upper_32_bits(adev->gfx.ngg.buf[CNTL].gpu_addr);
989 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
990 	WREG32(SOC15_REG_OFFSET(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI), data);
991 
992 	/* Clear GDS reserved memory */
993 	r = amdgpu_ring_alloc(ring, 17);
994 	if (r) {
995 		DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n",
996 			  ring->idx, r);
997 		return r;
998 	}
999 
1000 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1001 				   amdgpu_gds_reg_offset[0].mem_size,
1002 			           (adev->gds.mem.total_size +
1003 				    adev->gfx.ngg.gds_reserve_size) >>
1004 				   AMDGPU_GDS_SHIFT);
1005 
1006 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1007 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1008 				PACKET3_DMA_DATA_SRC_SEL(2)));
1009 	amdgpu_ring_write(ring, 0);
1010 	amdgpu_ring_write(ring, 0);
1011 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1012 	amdgpu_ring_write(ring, 0);
1013 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size);
1014 
1015 
1016 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1017 				   amdgpu_gds_reg_offset[0].mem_size, 0);
1018 
1019 	amdgpu_ring_commit(ring);
1020 
1021 	return 0;
1022 }
1023 
1024 static int gfx_v9_0_sw_init(void *handle)
1025 {
1026 	int i, r;
1027 	struct amdgpu_ring *ring;
1028 	struct amdgpu_kiq *kiq;
1029 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1030 
1031 	/* KIQ event */
1032 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
1033 	if (r)
1034 		return r;
1035 
1036 	/* EOP Event */
1037 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
1038 	if (r)
1039 		return r;
1040 
1041 	/* Privileged reg */
1042 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
1043 			      &adev->gfx.priv_reg_irq);
1044 	if (r)
1045 		return r;
1046 
1047 	/* Privileged inst */
1048 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185,
1049 			      &adev->gfx.priv_inst_irq);
1050 	if (r)
1051 		return r;
1052 
1053 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1054 
1055 	gfx_v9_0_scratch_init(adev);
1056 
1057 	r = gfx_v9_0_init_microcode(adev);
1058 	if (r) {
1059 		DRM_ERROR("Failed to load gfx firmware!\n");
1060 		return r;
1061 	}
1062 
1063 	r = gfx_v9_0_mec_init(adev);
1064 	if (r) {
1065 		DRM_ERROR("Failed to init MEC BOs!\n");
1066 		return r;
1067 	}
1068 
1069 	/* set up the gfx ring */
1070 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1071 		ring = &adev->gfx.gfx_ring[i];
1072 		ring->ring_obj = NULL;
1073 		sprintf(ring->name, "gfx");
1074 		ring->use_doorbell = true;
1075 		ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
1076 		r = amdgpu_ring_init(adev, ring, 1024,
1077 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1078 		if (r)
1079 			return r;
1080 	}
1081 
1082 	/* set up the compute queues */
1083 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1084 		unsigned irq_type;
1085 
1086 		/* max 32 queues per MEC */
1087 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1088 			DRM_ERROR("Too many (%d) compute rings!\n", i);
1089 			break;
1090 		}
1091 		ring = &adev->gfx.compute_ring[i];
1092 		ring->ring_obj = NULL;
1093 		ring->use_doorbell = true;
1094 		ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
1095 		ring->me = 1; /* first MEC */
1096 		ring->pipe = i / 8;
1097 		ring->queue = i % 8;
1098 		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
1099 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1100 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1101 		/* type-2 packets are deprecated on MEC, use type-3 instead */
1102 		r = amdgpu_ring_init(adev, ring, 1024,
1103 				     &adev->gfx.eop_irq, irq_type);
1104 		if (r)
1105 			return r;
1106 	}
1107 
1108 	if (amdgpu_sriov_vf(adev)) {
1109 		r = gfx_v9_0_kiq_init(adev);
1110 		if (r) {
1111 			DRM_ERROR("Failed to init KIQ BOs!\n");
1112 			return r;
1113 		}
1114 
1115 		kiq = &adev->gfx.kiq;
1116 		r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1117 		if (r)
1118 			return r;
1119 
1120 		/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1121 		r = gfx_v9_0_compute_mqd_sw_init(adev);
1122 		if (r)
1123 			return r;
1124 	}
1125 
1126 	/* reserve GDS, GWS and OA resource for gfx */
1127 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
1128 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
1129 				    &adev->gds.gds_gfx_bo, NULL, NULL);
1130 	if (r)
1131 		return r;
1132 
1133 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
1134 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
1135 				    &adev->gds.gws_gfx_bo, NULL, NULL);
1136 	if (r)
1137 		return r;
1138 
1139 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
1140 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
1141 				    &adev->gds.oa_gfx_bo, NULL, NULL);
1142 	if (r)
1143 		return r;
1144 
1145 	adev->gfx.ce_ram_size = 0x8000;
1146 
1147 	gfx_v9_0_gpu_early_init(adev);
1148 
1149 	r = gfx_v9_0_ngg_init(adev);
1150 	if (r)
1151 		return r;
1152 
1153 	return 0;
1154 }
1155 
1156 
1157 static int gfx_v9_0_sw_fini(void *handle)
1158 {
1159 	int i;
1160 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1161 
1162 	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
1163 	amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
1164 	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
1165 
1166 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1167 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1168 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1169 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1170 
1171 	if (amdgpu_sriov_vf(adev)) {
1172 		gfx_v9_0_compute_mqd_sw_fini(adev);
1173 		gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1174 		gfx_v9_0_kiq_fini(adev);
1175 	}
1176 
1177 	gfx_v9_0_mec_fini(adev);
1178 	gfx_v9_0_ngg_fini(adev);
1179 
1180 	return 0;
1181 }
1182 
1183 
1184 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1185 {
1186 	/* TODO */
1187 }
1188 
1189 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1190 {
1191 	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1192 
1193 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
1194 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1195 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1196 	} else if (se_num == 0xffffffff) {
1197 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1198 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1199 	} else if (sh_num == 0xffffffff) {
1200 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1201 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1202 	} else {
1203 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1204 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1205 	}
1206 	WREG32( SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
1207 }
1208 
1209 static u32 gfx_v9_0_create_bitmask(u32 bit_width)
1210 {
1211 	return (u32)((1ULL << bit_width) - 1);
1212 }
1213 
1214 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1215 {
1216 	u32 data, mask;
1217 
1218 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_RB_BACKEND_DISABLE));
1219 	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_RB_BACKEND_DISABLE));
1220 
1221 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1222 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1223 
1224 	mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se /
1225 				       adev->gfx.config.max_sh_per_se);
1226 
1227 	return (~data) & mask;
1228 }
1229 
1230 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1231 {
1232 	int i, j;
1233 	u32 data;
1234 	u32 active_rbs = 0;
1235 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1236 					adev->gfx.config.max_sh_per_se;
1237 
1238 	mutex_lock(&adev->grbm_idx_mutex);
1239 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1240 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1241 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1242 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1243 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1244 					       rb_bitmap_width_per_sh);
1245 		}
1246 	}
1247 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1248 	mutex_unlock(&adev->grbm_idx_mutex);
1249 
1250 	adev->gfx.config.backend_enable_mask = active_rbs;
1251 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1252 }
1253 
1254 #define DEFAULT_SH_MEM_BASES	(0x6000)
1255 #define FIRST_COMPUTE_VMID	(8)
1256 #define LAST_COMPUTE_VMID	(16)
1257 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1258 {
1259 	int i;
1260 	uint32_t sh_mem_config;
1261 	uint32_t sh_mem_bases;
1262 
1263 	/*
1264 	 * Configure apertures:
1265 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1266 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1267 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1268 	 */
1269 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1270 
1271 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1272 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1273 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1274 
1275 	mutex_lock(&adev->srbm_mutex);
1276 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1277 		soc15_grbm_select(adev, 0, 0, 0, i);
1278 		/* CP and shaders */
1279 		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
1280 		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
1281 	}
1282 	soc15_grbm_select(adev, 0, 0, 0, 0);
1283 	mutex_unlock(&adev->srbm_mutex);
1284 }
1285 
1286 static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
1287 {
1288 	u32 tmp;
1289 	int i;
1290 
1291 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL));
1292 	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
1293 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL), tmp);
1294 
1295 	gfx_v9_0_tiling_mode_table_init(adev);
1296 
1297 	gfx_v9_0_setup_rb(adev);
1298 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1299 
1300 	/* XXX SH_MEM regs */
1301 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1302 	mutex_lock(&adev->srbm_mutex);
1303 	for (i = 0; i < 16; i++) {
1304 		soc15_grbm_select(adev, 0, 0, 0, i);
1305 		/* CP and shaders */
1306 		tmp = 0;
1307 		tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
1308 				    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1309 		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), tmp);
1310 		WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), 0);
1311 	}
1312 	soc15_grbm_select(adev, 0, 0, 0, 0);
1313 
1314 	mutex_unlock(&adev->srbm_mutex);
1315 
1316 	gfx_v9_0_init_compute_vmid(adev);
1317 
1318 	mutex_lock(&adev->grbm_idx_mutex);
1319 	/*
1320 	 * making sure that the following register writes will be broadcasted
1321 	 * to all the shaders
1322 	 */
1323 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1324 
1325 	WREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE),
1326 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
1327 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1328 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
1329 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1330 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
1331 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1332 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1333 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1334 	mutex_unlock(&adev->grbm_idx_mutex);
1335 
1336 }
1337 
1338 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1339 {
1340 	u32 i, j, k;
1341 	u32 mask;
1342 
1343 	mutex_lock(&adev->grbm_idx_mutex);
1344 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1345 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1346 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1347 			for (k = 0; k < adev->usec_timeout; k++) {
1348 				if (RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY)) == 0)
1349 					break;
1350 				udelay(1);
1351 			}
1352 		}
1353 	}
1354 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1355 	mutex_unlock(&adev->grbm_idx_mutex);
1356 
1357 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
1358 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
1359 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
1360 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
1361 	for (k = 0; k < adev->usec_timeout; k++) {
1362 		if ((RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY)) & mask) == 0)
1363 			break;
1364 		udelay(1);
1365 	}
1366 }
1367 
1368 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
1369 					       bool enable)
1370 {
1371 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
1372 
1373 	if (enable)
1374 		return;
1375 
1376 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
1377 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
1378 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
1379 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
1380 
1381 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), tmp);
1382 }
1383 
1384 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
1385 {
1386 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
1387 
1388 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
1389 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
1390 
1391 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
1392 
1393 	gfx_v9_0_wait_for_rlc_serdes(adev);
1394 }
1395 
1396 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
1397 {
1398 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
1399 
1400 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
1401 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
1402 	udelay(50);
1403 	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
1404 	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
1405 	udelay(50);
1406 }
1407 
1408 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
1409 {
1410 #ifdef AMDGPU_RLC_DEBUG_RETRY
1411 	u32 rlc_ucode_ver;
1412 #endif
1413 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
1414 
1415 	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
1416 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
1417 
1418 	/* carrizo do enable cp interrupt after cp inited */
1419 	if (!(adev->flags & AMD_IS_APU))
1420 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
1421 
1422 	udelay(50);
1423 
1424 #ifdef AMDGPU_RLC_DEBUG_RETRY
1425 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
1426 	rlc_ucode_ver = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6));
1427 	if(rlc_ucode_ver == 0x108) {
1428 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
1429 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
1430 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
1431 		 * default is 0x9C4 to create a 100us interval */
1432 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3), 0x9C4);
1433 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
1434 		 * to disable the page fault retry interrupts, default is
1435 		 * 0x100 (256) */
1436 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12), 0x100);
1437 	}
1438 #endif
1439 }
1440 
1441 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
1442 {
1443 	const struct rlc_firmware_header_v2_0 *hdr;
1444 	const __le32 *fw_data;
1445 	unsigned i, fw_size;
1446 
1447 	if (!adev->gfx.rlc_fw)
1448 		return -EINVAL;
1449 
1450 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1451 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
1452 
1453 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
1454 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1455 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1456 
1457 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR),
1458 			RLCG_UCODE_LOADING_START_ADDRESS);
1459 	for (i = 0; i < fw_size; i++)
1460 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA), le32_to_cpup(fw_data++));
1461 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR), adev->gfx.rlc_fw_version);
1462 
1463 	return 0;
1464 }
1465 
1466 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
1467 {
1468 	int r;
1469 
1470 	if (amdgpu_sriov_vf(adev))
1471 		return 0;
1472 
1473 	gfx_v9_0_rlc_stop(adev);
1474 
1475 	/* disable CG */
1476 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), 0);
1477 
1478 	/* disable PG */
1479 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), 0);
1480 
1481 	gfx_v9_0_rlc_reset(adev);
1482 
1483 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
1484 		/* legacy rlc firmware loading */
1485 		r = gfx_v9_0_rlc_load_microcode(adev);
1486 		if (r)
1487 			return r;
1488 	}
1489 
1490 	gfx_v9_0_rlc_start(adev);
1491 
1492 	return 0;
1493 }
1494 
1495 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
1496 {
1497 	int i;
1498 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL));
1499 
1500 	if (enable) {
1501 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
1502 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
1503 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
1504 	} else {
1505 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
1506 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
1507 		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
1508 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1509 			adev->gfx.gfx_ring[i].ready = false;
1510 	}
1511 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL), tmp);
1512 	udelay(50);
1513 }
1514 
1515 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
1516 {
1517 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
1518 	const struct gfx_firmware_header_v1_0 *ce_hdr;
1519 	const struct gfx_firmware_header_v1_0 *me_hdr;
1520 	const __le32 *fw_data;
1521 	unsigned i, fw_size;
1522 
1523 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
1524 		return -EINVAL;
1525 
1526 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
1527 		adev->gfx.pfp_fw->data;
1528 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
1529 		adev->gfx.ce_fw->data;
1530 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
1531 		adev->gfx.me_fw->data;
1532 
1533 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
1534 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
1535 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
1536 
1537 	gfx_v9_0_cp_gfx_enable(adev, false);
1538 
1539 	/* PFP */
1540 	fw_data = (const __le32 *)
1541 		(adev->gfx.pfp_fw->data +
1542 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
1543 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
1544 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), 0);
1545 	for (i = 0; i < fw_size; i++)
1546 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA), le32_to_cpup(fw_data++));
1547 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR), adev->gfx.pfp_fw_version);
1548 
1549 	/* CE */
1550 	fw_data = (const __le32 *)
1551 		(adev->gfx.ce_fw->data +
1552 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
1553 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
1554 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), 0);
1555 	for (i = 0; i < fw_size; i++)
1556 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA), le32_to_cpup(fw_data++));
1557 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR), adev->gfx.ce_fw_version);
1558 
1559 	/* ME */
1560 	fw_data = (const __le32 *)
1561 		(adev->gfx.me_fw->data +
1562 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
1563 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
1564 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), 0);
1565 	for (i = 0; i < fw_size; i++)
1566 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_DATA), le32_to_cpup(fw_data++));
1567 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_RAM_WADDR), adev->gfx.me_fw_version);
1568 
1569 	return 0;
1570 }
1571 
1572 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1573 {
1574 	u32 count = 0;
1575 	const struct cs_section_def *sect = NULL;
1576 	const struct cs_extent_def *ext = NULL;
1577 
1578 	/* begin clear state */
1579 	count += 2;
1580 	/* context control state */
1581 	count += 3;
1582 
1583 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1584 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1585 			if (sect->id == SECT_CONTEXT)
1586 				count += 2 + ext->reg_count;
1587 			else
1588 				return 0;
1589 		}
1590 	}
1591 	/* pa_sc_raster_config/pa_sc_raster_config1 */
1592 	count += 4;
1593 	/* end clear state */
1594 	count += 2;
1595 	/* clear state */
1596 	count += 2;
1597 
1598 	return count;
1599 }
1600 
1601 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
1602 {
1603 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1604 	const struct cs_section_def *sect = NULL;
1605 	const struct cs_extent_def *ext = NULL;
1606 	int r, i;
1607 
1608 	/* init the CP */
1609 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT), adev->gfx.config.max_hw_contexts - 1);
1610 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID), 1);
1611 
1612 	gfx_v9_0_cp_gfx_enable(adev, true);
1613 
1614 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4);
1615 	if (r) {
1616 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
1617 		return r;
1618 	}
1619 
1620 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1621 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1622 
1623 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1624 	amdgpu_ring_write(ring, 0x80000000);
1625 	amdgpu_ring_write(ring, 0x80000000);
1626 
1627 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1628 		for (ext = sect->section; ext->extent != NULL; ++ext) {
1629 			if (sect->id == SECT_CONTEXT) {
1630 				amdgpu_ring_write(ring,
1631 				       PACKET3(PACKET3_SET_CONTEXT_REG,
1632 					       ext->reg_count));
1633 				amdgpu_ring_write(ring,
1634 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
1635 				for (i = 0; i < ext->reg_count; i++)
1636 					amdgpu_ring_write(ring, ext->extent[i]);
1637 			}
1638 		}
1639 	}
1640 
1641 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1642 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
1643 
1644 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
1645 	amdgpu_ring_write(ring, 0);
1646 
1647 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
1648 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
1649 	amdgpu_ring_write(ring, 0x8000);
1650 	amdgpu_ring_write(ring, 0x8000);
1651 
1652 	amdgpu_ring_commit(ring);
1653 
1654 	return 0;
1655 }
1656 
1657 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
1658 {
1659 	struct amdgpu_ring *ring;
1660 	u32 tmp;
1661 	u32 rb_bufsz;
1662 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
1663 
1664 	/* Set the write pointer delay */
1665 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY), 0);
1666 
1667 	/* set the RB to use vmid 0 */
1668 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID), 0);
1669 
1670 	/* Set ring buffer size */
1671 	ring = &adev->gfx.gfx_ring[0];
1672 	rb_bufsz = order_base_2(ring->ring_size / 8);
1673 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
1674 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
1675 #ifdef __BIG_ENDIAN
1676 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
1677 #endif
1678 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
1679 
1680 	/* Initialize the ring buffer's write pointers */
1681 	ring->wptr = 0;
1682 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
1683 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
1684 
1685 	/* set the wb address wether it's enabled or not */
1686 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1687 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR), lower_32_bits(rptr_addr));
1688 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI), upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
1689 
1690 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1691 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr));
1692 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI), upper_32_bits(wptr_gpu_addr));
1693 
1694 	mdelay(1);
1695 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL), tmp);
1696 
1697 	rb_addr = ring->gpu_addr >> 8;
1698 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE), rb_addr);
1699 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI), upper_32_bits(rb_addr));
1700 
1701 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL));
1702 	if (ring->use_doorbell) {
1703 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
1704 				    DOORBELL_OFFSET, ring->doorbell_index);
1705 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
1706 				    DOORBELL_EN, 1);
1707 	} else {
1708 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
1709 	}
1710 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_CONTROL), tmp);
1711 
1712 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
1713 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
1714 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER), tmp);
1715 
1716 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER),
1717 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
1718 
1719 
1720 	/* start the ring */
1721 	gfx_v9_0_cp_gfx_start(adev);
1722 	ring->ready = true;
1723 
1724 	return 0;
1725 }
1726 
1727 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
1728 {
1729 	int i;
1730 
1731 	if (enable) {
1732 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL), 0);
1733 	} else {
1734 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL),
1735 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
1736 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
1737 			adev->gfx.compute_ring[i].ready = false;
1738 		adev->gfx.kiq.ring.ready = false;
1739 	}
1740 	udelay(50);
1741 }
1742 
1743 static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
1744 {
1745 	gfx_v9_0_cp_compute_enable(adev, true);
1746 
1747 	return 0;
1748 }
1749 
1750 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
1751 {
1752 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1753 	const __le32 *fw_data;
1754 	unsigned i;
1755 	u32 tmp;
1756 
1757 	if (!adev->gfx.mec_fw)
1758 		return -EINVAL;
1759 
1760 	gfx_v9_0_cp_compute_enable(adev, false);
1761 
1762 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1763 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
1764 
1765 	fw_data = (const __le32 *)
1766 		(adev->gfx.mec_fw->data +
1767 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1768 	tmp = 0;
1769 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
1770 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
1771 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_CNTL), tmp);
1772 
1773 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_LO),
1774 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
1775 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_IC_BASE_HI),
1776 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
1777 
1778 	/* MEC1 */
1779 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
1780 			 mec_hdr->jt_offset);
1781 	for (i = 0; i < mec_hdr->jt_size; i++)
1782 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA),
1783 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
1784 
1785 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR),
1786 			adev->gfx.mec_fw_version);
1787 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
1788 
1789 	return 0;
1790 }
1791 
1792 static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
1793 {
1794 	int i, r;
1795 
1796 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1797 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
1798 
1799 		if (ring->mqd_obj) {
1800 			r = amdgpu_bo_reserve(ring->mqd_obj, false);
1801 			if (unlikely(r != 0))
1802 				dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
1803 
1804 			amdgpu_bo_unpin(ring->mqd_obj);
1805 			amdgpu_bo_unreserve(ring->mqd_obj);
1806 
1807 			amdgpu_bo_unref(&ring->mqd_obj);
1808 			ring->mqd_obj = NULL;
1809 		}
1810 	}
1811 }
1812 
1813 static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
1814 
1815 static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
1816 {
1817 	int i, r;
1818 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1819 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
1820 		if (gfx_v9_0_init_queue(ring))
1821 			dev_warn(adev->dev, "compute queue %d init failed!\n", i);
1822 	}
1823 
1824 	r = gfx_v9_0_cp_compute_start(adev);
1825 	if (r)
1826 		return r;
1827 
1828 	return 0;
1829 }
1830 
1831 /* KIQ functions */
1832 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
1833 {
1834 	uint32_t tmp;
1835 	struct amdgpu_device *adev = ring->adev;
1836 
1837 	/* tell RLC which is KIQ queue */
1838 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
1839 	tmp &= 0xffffff00;
1840 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
1841 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
1842 	tmp |= 0x80;
1843 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), tmp);
1844 }
1845 
1846 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
1847 {
1848 	amdgpu_ring_alloc(ring, 8);
1849 	/* set resources */
1850 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
1851 	amdgpu_ring_write(ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
1852 	amdgpu_ring_write(ring, 0x000000FF);	/* queue mask lo */
1853 	amdgpu_ring_write(ring, 0);	/* queue mask hi */
1854 	amdgpu_ring_write(ring, 0);	/* gws mask lo */
1855 	amdgpu_ring_write(ring, 0);	/* gws mask hi */
1856 	amdgpu_ring_write(ring, 0);	/* oac mask */
1857 	amdgpu_ring_write(ring, 0);	/* gds heap base:0, gds heap size:0 */
1858 	amdgpu_ring_commit(ring);
1859 	udelay(50);
1860 }
1861 
1862 static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
1863 				   struct amdgpu_ring *ring)
1864 {
1865 	struct amdgpu_device *adev = kiq_ring->adev;
1866 	uint64_t mqd_addr, wptr_addr;
1867 
1868 	mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
1869 	wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1870 	amdgpu_ring_alloc(kiq_ring, 8);
1871 
1872 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
1873 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
1874 	amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
1875 			  (0 << 4) | /* Queue_Sel */
1876 			  (0 << 8) | /* VMID */
1877 			  (ring->queue << 13 ) |
1878 			  (ring->pipe << 16) |
1879 			  ((ring->me == 1 ? 0 : 1) << 18) |
1880 			  (0 << 21) | /*queue_type: normal compute queue */
1881 			  (1 << 24) | /* alloc format: all_on_one_pipe */
1882 			  (0 << 26) | /* engine_sel: compute */
1883 			  (1 << 29)); /* num_queues: must be 1 */
1884 	amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2));
1885 	amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
1886 	amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
1887 	amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
1888 	amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
1889 	amdgpu_ring_commit(kiq_ring);
1890 	udelay(50);
1891 }
1892 
1893 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
1894 {
1895 	struct amdgpu_device *adev = ring->adev;
1896 	struct v9_mqd *mqd = ring->mqd_ptr;
1897 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
1898 	uint32_t tmp;
1899 
1900 	mqd->header = 0xC0310800;
1901 	mqd->compute_pipelinestat_enable = 0x00000001;
1902 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
1903 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
1904 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
1905 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
1906 	mqd->compute_misc_reserved = 0x00000003;
1907 
1908 	eop_base_addr = ring->eop_gpu_addr >> 8;
1909 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
1910 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
1911 
1912 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
1913 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
1914 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
1915 			(order_base_2(MEC_HPD_SIZE / 4) - 1));
1916 
1917 	mqd->cp_hqd_eop_control = tmp;
1918 
1919 	/* enable doorbell? */
1920 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
1921 
1922 	if (ring->use_doorbell) {
1923 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1924 				    DOORBELL_OFFSET, ring->doorbell_index);
1925 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1926 				    DOORBELL_EN, 1);
1927 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1928 				    DOORBELL_SOURCE, 0);
1929 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1930 				    DOORBELL_HIT, 0);
1931 	}
1932 	else
1933 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1934 					 DOORBELL_EN, 0);
1935 
1936 	mqd->cp_hqd_pq_doorbell_control = tmp;
1937 
1938 	/* disable the queue if it's active */
1939 	ring->wptr = 0;
1940 	mqd->cp_hqd_dequeue_request = 0;
1941 	mqd->cp_hqd_pq_rptr = 0;
1942 	mqd->cp_hqd_pq_wptr_lo = 0;
1943 	mqd->cp_hqd_pq_wptr_hi = 0;
1944 
1945 	/* set the pointer to the MQD */
1946 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
1947 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
1948 
1949 	/* set MQD vmid to 0 */
1950 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
1951 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
1952 	mqd->cp_mqd_control = tmp;
1953 
1954 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
1955 	hqd_gpu_addr = ring->gpu_addr >> 8;
1956 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
1957 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
1958 
1959 	/* set up the HQD, this is similar to CP_RB0_CNTL */
1960 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
1961 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
1962 			    (order_base_2(ring->ring_size / 4) - 1));
1963 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
1964 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
1965 #ifdef __BIG_ENDIAN
1966 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
1967 #endif
1968 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
1969 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
1970 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
1971 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
1972 	mqd->cp_hqd_pq_control = tmp;
1973 
1974 	/* set the wb address whether it's enabled or not */
1975 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
1976 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
1977 	mqd->cp_hqd_pq_rptr_report_addr_hi =
1978 		upper_32_bits(wb_gpu_addr) & 0xffff;
1979 
1980 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
1981 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
1982 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
1983 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
1984 
1985 	tmp = 0;
1986 	/* enable the doorbell if requested */
1987 	if (ring->use_doorbell) {
1988 		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
1989 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1990 				DOORBELL_OFFSET, ring->doorbell_index);
1991 
1992 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1993 					 DOORBELL_EN, 1);
1994 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1995 					 DOORBELL_SOURCE, 0);
1996 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
1997 					 DOORBELL_HIT, 0);
1998 	}
1999 
2000 	mqd->cp_hqd_pq_doorbell_control = tmp;
2001 
2002 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2003 	ring->wptr = 0;
2004 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
2005 
2006 	/* set the vmid for the queue */
2007 	mqd->cp_hqd_vmid = 0;
2008 
2009 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
2010 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2011 	mqd->cp_hqd_persistent_state = tmp;
2012 
2013 	/* activate the queue */
2014 	mqd->cp_hqd_active = 1;
2015 
2016 	return 0;
2017 }
2018 
2019 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2020 {
2021 	struct amdgpu_device *adev = ring->adev;
2022 	struct v9_mqd *mqd = ring->mqd_ptr;
2023 	uint32_t tmp;
2024 	int j;
2025 
2026 	/* disable wptr polling */
2027 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
2028 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2029 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
2030 
2031 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
2032 	       mqd->cp_hqd_eop_base_addr_lo);
2033 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
2034 	       mqd->cp_hqd_eop_base_addr_hi);
2035 
2036 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2037 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL),
2038 	       mqd->cp_hqd_eop_control);
2039 
2040 	/* enable doorbell? */
2041 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
2042 	       mqd->cp_hqd_pq_doorbell_control);
2043 
2044 	/* disable the queue if it's active */
2045 	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
2046 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
2047 		for (j = 0; j < adev->usec_timeout; j++) {
2048 			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
2049 				break;
2050 			udelay(1);
2051 		}
2052 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
2053 		       mqd->cp_hqd_dequeue_request);
2054 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR),
2055 		       mqd->cp_hqd_pq_rptr);
2056 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
2057 		       mqd->cp_hqd_pq_wptr_lo);
2058 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
2059 		       mqd->cp_hqd_pq_wptr_hi);
2060 	}
2061 
2062 	/* set the pointer to the MQD */
2063 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR),
2064 	       mqd->cp_mqd_base_addr_lo);
2065 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI),
2066 	       mqd->cp_mqd_base_addr_hi);
2067 
2068 	/* set MQD vmid to 0 */
2069 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL),
2070 	       mqd->cp_mqd_control);
2071 
2072 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2073 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE),
2074 	       mqd->cp_hqd_pq_base_lo);
2075 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI),
2076 	       mqd->cp_hqd_pq_base_hi);
2077 
2078 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2079 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL),
2080 	       mqd->cp_hqd_pq_control);
2081 
2082 	/* set the wb address whether it's enabled or not */
2083 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
2084 				mqd->cp_hqd_pq_rptr_report_addr_lo);
2085 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
2086 				mqd->cp_hqd_pq_rptr_report_addr_hi);
2087 
2088 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2089 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
2090 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
2091 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
2092 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
2093 
2094 	/* enable the doorbell if requested */
2095 	if (ring->use_doorbell) {
2096 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
2097 					(AMDGPU_DOORBELL64_KIQ *2) << 2);
2098 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
2099 					(AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2);
2100 	}
2101 
2102 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
2103 	       mqd->cp_hqd_pq_doorbell_control);
2104 
2105 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2106 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
2107 	       mqd->cp_hqd_pq_wptr_lo);
2108 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
2109 	       mqd->cp_hqd_pq_wptr_hi);
2110 
2111 	/* set the vmid for the queue */
2112 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
2113 
2114 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE),
2115 	       mqd->cp_hqd_persistent_state);
2116 
2117 	/* activate the queue */
2118 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE),
2119 	       mqd->cp_hqd_active);
2120 
2121 	if (ring->use_doorbell) {
2122 		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
2123 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
2124 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
2125 	}
2126 
2127 	return 0;
2128 }
2129 
2130 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
2131 {
2132 	struct amdgpu_device *adev = ring->adev;
2133 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
2134 	struct v9_mqd *mqd = ring->mqd_ptr;
2135 	bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ);
2136 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
2137 
2138 	if (is_kiq) {
2139 		gfx_v9_0_kiq_setting(&kiq->ring);
2140 	} else {
2141 		mqd_idx = ring - &adev->gfx.compute_ring[0];
2142 	}
2143 
2144 	if (!adev->gfx.in_reset) {
2145 		memset((void *)mqd, 0, sizeof(*mqd));
2146 		mutex_lock(&adev->srbm_mutex);
2147 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2148 		gfx_v9_0_mqd_init(ring);
2149 		if (is_kiq)
2150 			gfx_v9_0_kiq_init_register(ring);
2151 		soc15_grbm_select(adev, 0, 0, 0, 0);
2152 		mutex_unlock(&adev->srbm_mutex);
2153 
2154 	} else { /* for GPU_RESET case */
2155 		/* reset MQD to a clean status */
2156 
2157 		/* reset ring buffer */
2158 		ring->wptr = 0;
2159 
2160 		if (is_kiq) {
2161 		    mutex_lock(&adev->srbm_mutex);
2162 		    soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
2163 		    gfx_v9_0_kiq_init_register(ring);
2164 		    soc15_grbm_select(adev, 0, 0, 0, 0);
2165 		    mutex_unlock(&adev->srbm_mutex);
2166 		}
2167 	}
2168 
2169 	if (is_kiq)
2170 		gfx_v9_0_kiq_enable(ring);
2171 	else
2172 		gfx_v9_0_map_queue_enable(&kiq->ring, ring);
2173 
2174 	return 0;
2175 }
2176 
2177 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
2178 {
2179 	struct amdgpu_ring *ring = NULL;
2180 	int r = 0, i;
2181 
2182 	gfx_v9_0_cp_compute_enable(adev, true);
2183 
2184 	ring = &adev->gfx.kiq.ring;
2185 
2186 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
2187 	if (unlikely(r != 0))
2188 		goto done;
2189 
2190 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2191 	if (!r) {
2192 		r = gfx_v9_0_kiq_init_queue(ring);
2193 		amdgpu_bo_kunmap(ring->mqd_obj);
2194 		ring->mqd_ptr = NULL;
2195 	}
2196 	amdgpu_bo_unreserve(ring->mqd_obj);
2197 	if (r)
2198 		goto done;
2199 
2200 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2201 		ring = &adev->gfx.compute_ring[i];
2202 
2203 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
2204 		if (unlikely(r != 0))
2205 			goto done;
2206 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
2207 		if (!r) {
2208 			r = gfx_v9_0_kiq_init_queue(ring);
2209 			amdgpu_bo_kunmap(ring->mqd_obj);
2210 			ring->mqd_ptr = NULL;
2211 		}
2212 		amdgpu_bo_unreserve(ring->mqd_obj);
2213 		if (r)
2214 			goto done;
2215 	}
2216 
2217 done:
2218 	return r;
2219 }
2220 
2221 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2222 {
2223 	int r,i;
2224 	struct amdgpu_ring *ring;
2225 
2226 	if (!(adev->flags & AMD_IS_APU))
2227 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2228 
2229 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2230 		/* legacy firmware loading */
2231 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
2232 		if (r)
2233 			return r;
2234 
2235 		r = gfx_v9_0_cp_compute_load_microcode(adev);
2236 		if (r)
2237 			return r;
2238 	}
2239 
2240 	r = gfx_v9_0_cp_gfx_resume(adev);
2241 	if (r)
2242 		return r;
2243 
2244 	if (amdgpu_sriov_vf(adev))
2245 		r = gfx_v9_0_kiq_resume(adev);
2246 	else
2247 		r = gfx_v9_0_cp_compute_resume(adev);
2248 	if (r)
2249 		return r;
2250 
2251 	ring = &adev->gfx.gfx_ring[0];
2252 	r = amdgpu_ring_test_ring(ring);
2253 	if (r) {
2254 		ring->ready = false;
2255 		return r;
2256 	}
2257 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2258 		ring = &adev->gfx.compute_ring[i];
2259 
2260 		ring->ready = true;
2261 		r = amdgpu_ring_test_ring(ring);
2262 		if (r)
2263 			ring->ready = false;
2264 	}
2265 
2266 	if (amdgpu_sriov_vf(adev)) {
2267 		ring = &adev->gfx.kiq.ring;
2268 		ring->ready = true;
2269 		r = amdgpu_ring_test_ring(ring);
2270 		if (r)
2271 			ring->ready = false;
2272 	}
2273 
2274 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2275 
2276 	return 0;
2277 }
2278 
2279 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
2280 {
2281 	gfx_v9_0_cp_gfx_enable(adev, enable);
2282 	gfx_v9_0_cp_compute_enable(adev, enable);
2283 }
2284 
2285 static int gfx_v9_0_hw_init(void *handle)
2286 {
2287 	int r;
2288 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2289 
2290 	gfx_v9_0_init_golden_registers(adev);
2291 
2292 	gfx_v9_0_gpu_init(adev);
2293 
2294 	r = gfx_v9_0_rlc_resume(adev);
2295 	if (r)
2296 		return r;
2297 
2298 	r = gfx_v9_0_cp_resume(adev);
2299 	if (r)
2300 		return r;
2301 
2302 	r = gfx_v9_0_ngg_en(adev);
2303 	if (r)
2304 		return r;
2305 
2306 	return r;
2307 }
2308 
2309 static int gfx_v9_0_hw_fini(void *handle)
2310 {
2311 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2312 
2313 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
2314 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
2315 	if (amdgpu_sriov_vf(adev)) {
2316 		pr_debug("For SRIOV client, shouldn't do anything.\n");
2317 		return 0;
2318 	}
2319 	gfx_v9_0_cp_enable(adev, false);
2320 	gfx_v9_0_rlc_stop(adev);
2321 	gfx_v9_0_cp_compute_fini(adev);
2322 
2323 	return 0;
2324 }
2325 
2326 static int gfx_v9_0_suspend(void *handle)
2327 {
2328 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2329 
2330 	return gfx_v9_0_hw_fini(adev);
2331 }
2332 
2333 static int gfx_v9_0_resume(void *handle)
2334 {
2335 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2336 
2337 	return gfx_v9_0_hw_init(adev);
2338 }
2339 
2340 static bool gfx_v9_0_is_idle(void *handle)
2341 {
2342 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2343 
2344 	if (REG_GET_FIELD(RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)),
2345 				GRBM_STATUS, GUI_ACTIVE))
2346 		return false;
2347 	else
2348 		return true;
2349 }
2350 
2351 static int gfx_v9_0_wait_for_idle(void *handle)
2352 {
2353 	unsigned i;
2354 	u32 tmp;
2355 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2356 
2357 	for (i = 0; i < adev->usec_timeout; i++) {
2358 		/* read MC_STATUS */
2359 		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)) &
2360 			GRBM_STATUS__GUI_ACTIVE_MASK;
2361 
2362 		if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
2363 			return 0;
2364 		udelay(1);
2365 	}
2366 	return -ETIMEDOUT;
2367 }
2368 
2369 static void gfx_v9_0_print_status(void *handle)
2370 {
2371 	int i;
2372 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2373 
2374 	dev_info(adev->dev, "GFX 9.x registers\n");
2375 	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
2376 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)));
2377 	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
2378 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)));
2379 	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
2380 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)));
2381 	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
2382 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)));
2383 	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
2384 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)));
2385 	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
2386 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)));
2387 	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STAT)));
2388 	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
2389 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)));
2390 	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
2391 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)));
2392 	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
2393 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)));
2394 	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
2395 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)));
2396 	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
2397 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)));
2398 	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)));
2399 	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_BUSY_STAT)));
2400 	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
2401 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)));
2402 	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)));
2403 
2404 	for (i = 0; i < 32; i++) {
2405 		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
2406 			 i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_TILE_MODE0 ) + i*4));
2407 	}
2408 	for (i = 0; i < 16; i++) {
2409 		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
2410 			 i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_MACROTILE_MODE0) + i*4));
2411 	}
2412 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2413 		dev_info(adev->dev, "  se: %d\n", i);
2414 		gfx_v9_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
2415 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
2416 			 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG)));
2417 		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
2418 			 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG_1)));
2419 	}
2420 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2421 
2422 	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
2423 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)));
2424 
2425 	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
2426 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEQ_THRESHOLDS)));
2427 	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
2428 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSX_DEBUG_1)));
2429 	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
2430 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX)));
2431 	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
2432 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL)));
2433 	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
2434 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG)));
2435 	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
2436 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG)));
2437 	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
2438 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)));
2439 	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
2440 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG3)));
2441 	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
2442 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL)));
2443 	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
2444 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1)));
2445 	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
2446 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE)));
2447 	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
2448 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_NUM_INSTANCES)));
2449 	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
2450 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL)));
2451 	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
2452 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FORCE_EOV_MAX_CNTS)));
2453 	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
2454 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION)));
2455 	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
2456 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_GS_VERTEX_REUSE)));
2457 	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
2458 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE)));
2459 	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
2460 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_CL_ENHANCE)));
2461 	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
2462 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE)));
2463 
2464 	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
2465 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)));
2466 	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
2467 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT)));
2468 	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
2469 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID)));
2470 
2471 	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
2472 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_SEM_WAIT_TIMER)));
2473 
2474 	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
2475 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY)));
2476 	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
2477 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID)));
2478 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
2479 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
2480 	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
2481 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)));
2482 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
2483 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR)));
2484 	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
2485 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI)));
2486 	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
2487 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
2488 	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
2489 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE)));
2490 	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
2491 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI)));
2492 	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
2493 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL)));
2494 
2495 	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
2496 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_ADDR)));
2497 	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
2498 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_UMSK)));
2499 
2500 	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
2501 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)));
2502 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
2503 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
2504 	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
2505 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)));
2506 	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
2507 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)));
2508 	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
2509 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_INIT)));
2510 	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
2511 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_MAX)));
2512 	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
2513 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_INIT_CU_MASK)));
2514 	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
2515 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_PARAMS)));
2516 	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
2517 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
2518 	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
2519 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_UCODE_CNTL)));
2520 
2521 	dev_info(adev->dev, "  RLC_GPM_GENERAL_6=0x%08X\n",
2522 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)));
2523 	dev_info(adev->dev, "  RLC_GPM_GENERAL_12=0x%08X\n",
2524 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12)));
2525 	dev_info(adev->dev, "  RLC_GPM_TIMER_INT_3=0x%08X\n",
2526 		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3)));
2527 	mutex_lock(&adev->srbm_mutex);
2528 	for (i = 0; i < 16; i++) {
2529 		soc15_grbm_select(adev, 0, 0, 0, i);
2530 		dev_info(adev->dev, "  VM %d:\n", i);
2531 		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
2532 			 RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)));
2533 		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
2534 			 RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES)));
2535 	}
2536 	soc15_grbm_select(adev, 0, 0, 0, 0);
2537 	mutex_unlock(&adev->srbm_mutex);
2538 }
2539 
2540 static int gfx_v9_0_soft_reset(void *handle)
2541 {
2542 	u32 grbm_soft_reset = 0;
2543 	u32 tmp;
2544 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2545 
2546 	/* GRBM_STATUS */
2547 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS));
2548 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
2549 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
2550 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
2551 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
2552 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
2553 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
2554 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
2555 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
2556 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
2557 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
2558 	}
2559 
2560 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
2561 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
2562 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
2563 	}
2564 
2565 	/* GRBM_STATUS2 */
2566 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2));
2567 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
2568 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
2569 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2570 
2571 
2572 	if (grbm_soft_reset ) {
2573 		gfx_v9_0_print_status((void *)adev);
2574 		/* stop the rlc */
2575 		gfx_v9_0_rlc_stop(adev);
2576 
2577 		/* Disable GFX parsing/prefetching */
2578 		gfx_v9_0_cp_gfx_enable(adev, false);
2579 
2580 		/* Disable MEC parsing/prefetching */
2581 		gfx_v9_0_cp_compute_enable(adev, false);
2582 
2583 		if (grbm_soft_reset) {
2584 			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
2585 			tmp |= grbm_soft_reset;
2586 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
2587 			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
2588 			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
2589 
2590 			udelay(50);
2591 
2592 			tmp &= ~grbm_soft_reset;
2593 			WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
2594 			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
2595 		}
2596 
2597 		/* Wait a little for things to settle down */
2598 		udelay(50);
2599 		gfx_v9_0_print_status((void *)adev);
2600 	}
2601 	return 0;
2602 }
2603 
2604 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
2605 {
2606 	uint64_t clock;
2607 
2608 	mutex_lock(&adev->gfx.gpu_clock_mutex);
2609 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT), 1);
2610 	clock = (uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB)) |
2611 		((uint64_t)RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB)) << 32ULL);
2612 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
2613 	return clock;
2614 }
2615 
2616 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
2617 					  uint32_t vmid,
2618 					  uint32_t gds_base, uint32_t gds_size,
2619 					  uint32_t gws_base, uint32_t gws_size,
2620 					  uint32_t oa_base, uint32_t oa_size)
2621 {
2622 	gds_base = gds_base >> AMDGPU_GDS_SHIFT;
2623 	gds_size = gds_size >> AMDGPU_GDS_SHIFT;
2624 
2625 	gws_base = gws_base >> AMDGPU_GWS_SHIFT;
2626 	gws_size = gws_size >> AMDGPU_GWS_SHIFT;
2627 
2628 	oa_base = oa_base >> AMDGPU_OA_SHIFT;
2629 	oa_size = oa_size >> AMDGPU_OA_SHIFT;
2630 
2631 	/* GDS Base */
2632 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2633 				   amdgpu_gds_reg_offset[vmid].mem_base,
2634 				   gds_base);
2635 
2636 	/* GDS Size */
2637 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2638 				   amdgpu_gds_reg_offset[vmid].mem_size,
2639 				   gds_size);
2640 
2641 	/* GWS */
2642 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2643 				   amdgpu_gds_reg_offset[vmid].gws,
2644 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
2645 
2646 	/* OA */
2647 	gfx_v9_0_write_data_to_reg(ring, 0, false,
2648 				   amdgpu_gds_reg_offset[vmid].oa,
2649 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
2650 }
2651 
2652 static int gfx_v9_0_early_init(void *handle)
2653 {
2654 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2655 
2656 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
2657 	adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
2658 	gfx_v9_0_set_ring_funcs(adev);
2659 	gfx_v9_0_set_irq_funcs(adev);
2660 	gfx_v9_0_set_gds_init(adev);
2661 	gfx_v9_0_set_rlc_funcs(adev);
2662 
2663 	return 0;
2664 }
2665 
2666 static int gfx_v9_0_late_init(void *handle)
2667 {
2668 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2669 	int r;
2670 
2671 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
2672 	if (r)
2673 		return r;
2674 
2675 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
2676 	if (r)
2677 		return r;
2678 
2679 	return 0;
2680 }
2681 
2682 static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
2683 {
2684 	uint32_t rlc_setting, data;
2685 	unsigned i;
2686 
2687 	if (adev->gfx.rlc.in_safe_mode)
2688 		return;
2689 
2690 	/* if RLC is not enabled, do nothing */
2691 	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
2692 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
2693 		return;
2694 
2695 	if (adev->cg_flags &
2696 	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG |
2697 	     AMD_CG_SUPPORT_GFX_3D_CGCG)) {
2698 		data = RLC_SAFE_MODE__CMD_MASK;
2699 		data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
2700 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
2701 
2702 		/* wait for RLC_SAFE_MODE */
2703 		for (i = 0; i < adev->usec_timeout; i++) {
2704 			if (!REG_GET_FIELD(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
2705 				break;
2706 			udelay(1);
2707 		}
2708 		adev->gfx.rlc.in_safe_mode = true;
2709 	}
2710 }
2711 
2712 static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
2713 {
2714 	uint32_t rlc_setting, data;
2715 
2716 	if (!adev->gfx.rlc.in_safe_mode)
2717 		return;
2718 
2719 	/* if RLC is not enabled, do nothing */
2720 	rlc_setting = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
2721 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
2722 		return;
2723 
2724 	if (adev->cg_flags &
2725 	    (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
2726 		/*
2727 		 * Try to exit safe mode only if it is already in safe
2728 		 * mode.
2729 		 */
2730 		data = RLC_SAFE_MODE__CMD_MASK;
2731 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SAFE_MODE), data);
2732 		adev->gfx.rlc.in_safe_mode = false;
2733 	}
2734 }
2735 
2736 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
2737 						      bool enable)
2738 {
2739 	uint32_t data, def;
2740 
2741 	/* It is disabled by HW by default */
2742 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
2743 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
2744 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
2745 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
2746 			  RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
2747 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
2748 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
2749 
2750 		/* only for Vega10 & Raven1 */
2751 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
2752 
2753 		if (def != data)
2754 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
2755 
2756 		/* MGLS is a global flag to control all MGLS in GFX */
2757 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
2758 			/* 2 - RLC memory Light sleep */
2759 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
2760 				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
2761 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
2762 				if (def != data)
2763 					WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
2764 			}
2765 			/* 3 - CP memory Light sleep */
2766 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
2767 				def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
2768 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
2769 				if (def != data)
2770 					WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
2771 			}
2772 		}
2773 	} else {
2774 		/* 1 - MGCG_OVERRIDE */
2775 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
2776 		data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK |
2777 			 RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
2778 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
2779 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
2780 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
2781 		if (def != data)
2782 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
2783 
2784 		/* 2 - disable MGLS in RLC */
2785 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
2786 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
2787 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
2788 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL), data);
2789 		}
2790 
2791 		/* 3 - disable MGLS in CP */
2792 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
2793 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
2794 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
2795 			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL), data);
2796 		}
2797 	}
2798 }
2799 
2800 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
2801 					   bool enable)
2802 {
2803 	uint32_t data, def;
2804 
2805 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
2806 
2807 	/* Enable 3D CGCG/CGLS */
2808 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
2809 		/* write cmd to clear cgcg/cgls ov */
2810 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
2811 		/* unset CGCG override */
2812 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
2813 		/* update CGCG and CGLS override bits */
2814 		if (def != data)
2815 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
2816 		/* enable 3Dcgcg FSM(0x0020003f) */
2817 		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
2818 		data = (0x2000 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
2819 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
2820 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
2821 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
2822 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
2823 		if (def != data)
2824 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
2825 
2826 		/* set IDLE_POLL_COUNT(0x00900100) */
2827 		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2828 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
2829 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2830 		if (def != data)
2831 			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2832 	} else {
2833 		/* Disable CGCG/CGLS */
2834 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
2835 		/* disable cgcg, cgls should be disabled */
2836 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
2837 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
2838 		/* disable cgcg and cgls in FSM */
2839 		if (def != data)
2840 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D), data);
2841 	}
2842 
2843 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
2844 }
2845 
2846 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
2847 						      bool enable)
2848 {
2849 	uint32_t def, data;
2850 
2851 	adev->gfx.rlc.funcs->enter_safe_mode(adev);
2852 
2853 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
2854 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
2855 		/* unset CGCG override */
2856 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
2857 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
2858 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
2859 		else
2860 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
2861 		/* update CGCG and CGLS override bits */
2862 		if (def != data)
2863 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE), data);
2864 
2865 		/* enable cgcg FSM(0x0020003F) */
2866 		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
2867 		data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
2868 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
2869 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
2870 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
2871 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
2872 		if (def != data)
2873 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
2874 
2875 		/* set IDLE_POLL_COUNT(0x00900100) */
2876 		def = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2877 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
2878 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2879 		if (def != data)
2880 			WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2881 	} else {
2882 		def = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
2883 		/* reset CGCG/CGLS bits */
2884 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
2885 		/* disable cgcg and cgls in FSM */
2886 		if (def != data)
2887 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL), data);
2888 	}
2889 
2890 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
2891 }
2892 
2893 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
2894 					    bool enable)
2895 {
2896 	if (enable) {
2897 		/* CGCG/CGLS should be enabled after MGCG/MGLS
2898 		 * ===  MGCG + MGLS ===
2899 		 */
2900 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
2901 		/* ===  CGCG /CGLS for GFX 3D Only === */
2902 		gfx_v9_0_update_3d_clock_gating(adev, enable);
2903 		/* ===  CGCG + CGLS === */
2904 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
2905 	} else {
2906 		/* CGCG/CGLS should be disabled before MGCG/MGLS
2907 		 * ===  CGCG + CGLS ===
2908 		 */
2909 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
2910 		/* ===  CGCG /CGLS for GFX 3D Only === */
2911 		gfx_v9_0_update_3d_clock_gating(adev, enable);
2912 		/* ===  MGCG + MGLS === */
2913 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
2914 	}
2915 	return 0;
2916 }
2917 
2918 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
2919 	.enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode,
2920 	.exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode
2921 };
2922 
2923 static int gfx_v9_0_set_powergating_state(void *handle,
2924 					  enum amd_powergating_state state)
2925 {
2926 	return 0;
2927 }
2928 
2929 static int gfx_v9_0_set_clockgating_state(void *handle,
2930 					  enum amd_clockgating_state state)
2931 {
2932 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2933 
2934 	switch (adev->asic_type) {
2935 	case CHIP_VEGA10:
2936 		gfx_v9_0_update_gfx_clock_gating(adev,
2937 						 state == AMD_CG_STATE_GATE ? true : false);
2938 		break;
2939 	default:
2940 		break;
2941 	}
2942 	return 0;
2943 }
2944 
2945 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
2946 {
2947 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2948 	int data;
2949 
2950 	if (amdgpu_sriov_vf(adev))
2951 		*flags = 0;
2952 
2953 	/* AMD_CG_SUPPORT_GFX_MGCG */
2954 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
2955 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
2956 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
2957 
2958 	/* AMD_CG_SUPPORT_GFX_CGCG */
2959 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
2960 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
2961 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
2962 
2963 	/* AMD_CG_SUPPORT_GFX_CGLS */
2964 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
2965 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
2966 
2967 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
2968 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
2969 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
2970 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
2971 
2972 	/* AMD_CG_SUPPORT_GFX_CP_LS */
2973 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
2974 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
2975 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
2976 
2977 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
2978 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
2979 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
2980 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
2981 
2982 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
2983 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
2984 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
2985 }
2986 
2987 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
2988 {
2989 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
2990 }
2991 
2992 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
2993 {
2994 	struct amdgpu_device *adev = ring->adev;
2995 	u64 wptr;
2996 
2997 	/* XXX check if swapping is necessary on BE */
2998 	if (ring->use_doorbell) {
2999 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
3000 	} else {
3001 		wptr = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR));
3002 		wptr += (u64)RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI)) << 32;
3003 	}
3004 
3005 	return wptr;
3006 }
3007 
3008 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3009 {
3010 	struct amdgpu_device *adev = ring->adev;
3011 
3012 	if (ring->use_doorbell) {
3013 		/* XXX check if swapping is necessary on BE */
3014 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3015 		WDOORBELL64(ring->doorbell_index, ring->wptr);
3016 	} else {
3017 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR), lower_32_bits(ring->wptr));
3018 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR_HI), upper_32_bits(ring->wptr));
3019 	}
3020 }
3021 
3022 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
3023 {
3024 	u32 ref_and_mask, reg_mem_engine;
3025 	struct nbio_hdp_flush_reg *nbio_hf_reg;
3026 
3027 	if (ring->adev->asic_type == CHIP_VEGA10)
3028 		nbio_hf_reg = &nbio_v6_1_hdp_flush_reg;
3029 
3030 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3031 		switch (ring->me) {
3032 		case 1:
3033 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
3034 			break;
3035 		case 2:
3036 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
3037 			break;
3038 		default:
3039 			return;
3040 		}
3041 		reg_mem_engine = 0;
3042 	} else {
3043 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
3044 		reg_mem_engine = 1; /* pfp */
3045 	}
3046 
3047 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
3048 			      nbio_hf_reg->hdp_flush_req_offset,
3049 			      nbio_hf_reg->hdp_flush_done_offset,
3050 			      ref_and_mask, ref_and_mask, 0x20);
3051 }
3052 
3053 static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
3054 {
3055 	gfx_v9_0_write_data_to_reg(ring, 0, true,
3056 				   SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 1);
3057 }
3058 
3059 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
3060                                       struct amdgpu_ib *ib,
3061                                       unsigned vm_id, bool ctx_switch)
3062 {
3063         u32 header, control = 0;
3064 
3065         if (ib->flags & AMDGPU_IB_FLAG_CE)
3066                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3067         else
3068                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3069 
3070         control |= ib->length_dw | (vm_id << 24);
3071 
3072 		if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT))
3073 			control |= INDIRECT_BUFFER_PRE_ENB(1);
3074 
3075         amdgpu_ring_write(ring, header);
3076 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3077         amdgpu_ring_write(ring,
3078 #ifdef __BIG_ENDIAN
3079                           (2 << 0) |
3080 #endif
3081                           lower_32_bits(ib->gpu_addr));
3082         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3083         amdgpu_ring_write(ring, control);
3084 }
3085 
3086 #define	INDIRECT_BUFFER_VALID                   (1 << 23)
3087 
3088 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
3089                                           struct amdgpu_ib *ib,
3090                                           unsigned vm_id, bool ctx_switch)
3091 {
3092         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
3093 
3094         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
3095 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
3096         amdgpu_ring_write(ring,
3097 #ifdef __BIG_ENDIAN
3098                                 (2 << 0) |
3099 #endif
3100                                 lower_32_bits(ib->gpu_addr));
3101         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
3102         amdgpu_ring_write(ring, control);
3103 }
3104 
3105 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
3106 				     u64 seq, unsigned flags)
3107 {
3108 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
3109 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
3110 
3111 	/* RELEASE_MEM - flush caches, send int */
3112 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
3113 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3114 				 EOP_TC_ACTION_EN |
3115 				 EOP_TC_WB_ACTION_EN |
3116 				 EOP_TC_MD_ACTION_EN |
3117 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3118 				 EVENT_INDEX(5)));
3119 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
3120 
3121 	/*
3122 	 * the address should be Qword aligned if 64bit write, Dword
3123 	 * aligned if only send 32bit data low (discard data high)
3124 	 */
3125 	if (write64bit)
3126 		BUG_ON(addr & 0x7);
3127 	else
3128 		BUG_ON(addr & 0x3);
3129 	amdgpu_ring_write(ring, lower_32_bits(addr));
3130 	amdgpu_ring_write(ring, upper_32_bits(addr));
3131 	amdgpu_ring_write(ring, lower_32_bits(seq));
3132 	amdgpu_ring_write(ring, upper_32_bits(seq));
3133 	amdgpu_ring_write(ring, 0);
3134 }
3135 
3136 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
3137 {
3138 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3139 	uint32_t seq = ring->fence_drv.sync_seq;
3140 	uint64_t addr = ring->fence_drv.gpu_addr;
3141 
3142 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
3143 			      lower_32_bits(addr), upper_32_bits(addr),
3144 			      seq, 0xffffffff, 4);
3145 }
3146 
3147 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3148 					unsigned vm_id, uint64_t pd_addr)
3149 {
3150 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
3151 	unsigned eng = ring->idx;
3152 	unsigned i;
3153 
3154 	pd_addr = pd_addr | 0x1; /* valid bit */
3155 	/* now only use physical base address of PDE and valid */
3156 	BUG_ON(pd_addr & 0xFFFF00000000003EULL);
3157 
3158 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
3159 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
3160 		uint32_t req = hub->get_invalidate_req(vm_id);
3161 
3162 		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3163 					   hub->ctx0_ptb_addr_lo32
3164 					   + (2 * vm_id),
3165 					   lower_32_bits(pd_addr));
3166 
3167 		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3168 					   hub->ctx0_ptb_addr_hi32
3169 					   + (2 * vm_id),
3170 					   upper_32_bits(pd_addr));
3171 
3172 		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
3173 					   hub->vm_inv_eng0_req + eng, req);
3174 
3175 		/* wait for the invalidate to complete */
3176 		gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
3177 				      eng, 0, 1 << vm_id, 1 << vm_id, 0x20);
3178 	}
3179 
3180 	/* compute doesn't have PFP */
3181 	if (usepfp) {
3182 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
3183 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3184 		amdgpu_ring_write(ring, 0x0);
3185 	}
3186 }
3187 
3188 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3189 {
3190 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
3191 }
3192 
3193 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3194 {
3195 	u64 wptr;
3196 
3197 	/* XXX check if swapping is necessary on BE */
3198 	if (ring->use_doorbell)
3199 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
3200 	else
3201 		BUG();
3202 	return wptr;
3203 }
3204 
3205 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3206 {
3207 	struct amdgpu_device *adev = ring->adev;
3208 
3209 	/* XXX check if swapping is necessary on BE */
3210 	if (ring->use_doorbell) {
3211 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
3212 		WDOORBELL64(ring->doorbell_index, ring->wptr);
3213 	} else{
3214 		BUG(); /* only DOORBELL method supported on gfx9 now */
3215 	}
3216 }
3217 
3218 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
3219 					 u64 seq, unsigned int flags)
3220 {
3221 	/* we only allocate 32bit for each seq wb address */
3222 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
3223 
3224 	/* write fence seq to the "addr" */
3225 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3226 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3227 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
3228 	amdgpu_ring_write(ring, lower_32_bits(addr));
3229 	amdgpu_ring_write(ring, upper_32_bits(addr));
3230 	amdgpu_ring_write(ring, lower_32_bits(seq));
3231 
3232 	if (flags & AMDGPU_FENCE_FLAG_INT) {
3233 		/* set register to trigger INT */
3234 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3235 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3236 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
3237 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
3238 		amdgpu_ring_write(ring, 0);
3239 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
3240 	}
3241 }
3242 
3243 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
3244 {
3245 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3246 	amdgpu_ring_write(ring, 0);
3247 }
3248 
3249 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
3250 {
3251 	static struct v9_ce_ib_state ce_payload = {0};
3252 	uint64_t csa_addr;
3253 	int cnt;
3254 
3255 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
3256 	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
3257 
3258 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3259 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
3260 				 WRITE_DATA_DST_SEL(8) |
3261 				 WR_CONFIRM) |
3262 				 WRITE_DATA_CACHE_POLICY(0));
3263 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
3264 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
3265 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
3266 }
3267 
3268 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
3269 {
3270 	static struct v9_de_ib_state de_payload = {0};
3271 	uint64_t csa_addr, gds_addr;
3272 	int cnt;
3273 
3274 	csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
3275 	gds_addr = csa_addr + 4096;
3276 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
3277 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
3278 
3279 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
3280 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
3281 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
3282 				 WRITE_DATA_DST_SEL(8) |
3283 				 WR_CONFIRM) |
3284 				 WRITE_DATA_CACHE_POLICY(0));
3285 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
3286 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
3287 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
3288 }
3289 
3290 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
3291 {
3292 	uint32_t dw2 = 0;
3293 
3294 	if (amdgpu_sriov_vf(ring->adev))
3295 		gfx_v9_0_ring_emit_ce_meta(ring);
3296 
3297 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
3298 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
3299 		/* set load_global_config & load_global_uconfig */
3300 		dw2 |= 0x8001;
3301 		/* set load_cs_sh_regs */
3302 		dw2 |= 0x01000000;
3303 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
3304 		dw2 |= 0x10002;
3305 
3306 		/* set load_ce_ram if preamble presented */
3307 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
3308 			dw2 |= 0x10000000;
3309 	} else {
3310 		/* still load_ce_ram if this is the first time preamble presented
3311 		 * although there is no context switch happens.
3312 		 */
3313 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
3314 			dw2 |= 0x10000000;
3315 	}
3316 
3317 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3318 	amdgpu_ring_write(ring, dw2);
3319 	amdgpu_ring_write(ring, 0);
3320 
3321 	if (amdgpu_sriov_vf(ring->adev))
3322 		gfx_v9_0_ring_emit_de_meta(ring);
3323 }
3324 
3325 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
3326 {
3327 	unsigned ret;
3328 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
3329 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
3330 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
3331 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
3332 	ret = ring->wptr & ring->buf_mask;
3333 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
3334 	return ret;
3335 }
3336 
3337 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
3338 {
3339 	unsigned cur;
3340 	BUG_ON(offset > ring->buf_mask);
3341 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
3342 
3343 	cur = (ring->wptr & ring->buf_mask) - 1;
3344 	if (likely(cur > offset))
3345 		ring->ring[offset] = cur - offset;
3346 	else
3347 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
3348 }
3349 
3350 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
3351 {
3352 	struct amdgpu_device *adev = ring->adev;
3353 
3354 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
3355 	amdgpu_ring_write(ring, 0 |	/* src: register*/
3356 				(5 << 8) |	/* dst: memory */
3357 				(1 << 20));	/* write confirm */
3358 	amdgpu_ring_write(ring, reg);
3359 	amdgpu_ring_write(ring, 0);
3360 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
3361 				adev->virt.reg_val_offs * 4));
3362 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
3363 				adev->virt.reg_val_offs * 4));
3364 }
3365 
3366 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
3367 				  uint32_t val)
3368 {
3369 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3370 	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
3371 	amdgpu_ring_write(ring, reg);
3372 	amdgpu_ring_write(ring, 0);
3373 	amdgpu_ring_write(ring, val);
3374 }
3375 
3376 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
3377 						 enum amdgpu_interrupt_state state)
3378 {
3379 	u32 cp_int_cntl;
3380 
3381 	switch (state) {
3382 	case AMDGPU_IRQ_STATE_DISABLE:
3383 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3384 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3385 					    TIME_STAMP_INT_ENABLE, 0);
3386 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3387 		break;
3388 	case AMDGPU_IRQ_STATE_ENABLE:
3389 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3390 		cp_int_cntl =
3391 			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3392 				      TIME_STAMP_INT_ENABLE, 1);
3393 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3394 		break;
3395 	default:
3396 		break;
3397 	}
3398 }
3399 
3400 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3401 						     int me, int pipe,
3402 						     enum amdgpu_interrupt_state state)
3403 {
3404 	u32 mec_int_cntl, mec_int_cntl_reg;
3405 
3406 	/*
3407 	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
3408 	 * handles the setting of interrupts for this specific pipe. All other
3409 	 * pipes' interrupts are set by amdkfd.
3410 	 */
3411 
3412 	if (me == 1) {
3413 		switch (pipe) {
3414 		case 0:
3415 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
3416 			break;
3417 		default:
3418 			DRM_DEBUG("invalid pipe %d\n", pipe);
3419 			return;
3420 		}
3421 	} else {
3422 		DRM_DEBUG("invalid me %d\n", me);
3423 		return;
3424 	}
3425 
3426 	switch (state) {
3427 	case AMDGPU_IRQ_STATE_DISABLE:
3428 		mec_int_cntl = RREG32(mec_int_cntl_reg);
3429 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3430 					     TIME_STAMP_INT_ENABLE, 0);
3431 		WREG32(mec_int_cntl_reg, mec_int_cntl);
3432 		break;
3433 	case AMDGPU_IRQ_STATE_ENABLE:
3434 		mec_int_cntl = RREG32(mec_int_cntl_reg);
3435 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3436 					     TIME_STAMP_INT_ENABLE, 1);
3437 		WREG32(mec_int_cntl_reg, mec_int_cntl);
3438 		break;
3439 	default:
3440 		break;
3441 	}
3442 }
3443 
3444 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
3445 					     struct amdgpu_irq_src *source,
3446 					     unsigned type,
3447 					     enum amdgpu_interrupt_state state)
3448 {
3449 	u32 cp_int_cntl;
3450 
3451 	switch (state) {
3452 	case AMDGPU_IRQ_STATE_DISABLE:
3453 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3454 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3455 					    PRIV_REG_INT_ENABLE, 0);
3456 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3457 		break;
3458 	case AMDGPU_IRQ_STATE_ENABLE:
3459 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3460 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3461 					    PRIV_REG_INT_ENABLE, 1);
3462 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3463 		break;
3464 	default:
3465 		break;
3466 	}
3467 
3468 	return 0;
3469 }
3470 
3471 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
3472 					      struct amdgpu_irq_src *source,
3473 					      unsigned type,
3474 					      enum amdgpu_interrupt_state state)
3475 {
3476 	u32 cp_int_cntl;
3477 
3478 	switch (state) {
3479 	case AMDGPU_IRQ_STATE_DISABLE:
3480 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3481 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3482 					    PRIV_INSTR_INT_ENABLE, 0);
3483 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3484 		break;
3485 	case AMDGPU_IRQ_STATE_ENABLE:
3486 		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
3487 		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3488 					    PRIV_INSTR_INT_ENABLE, 1);
3489 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
3490 		break;
3491 	default:
3492 		break;
3493 	}
3494 
3495 	return 0;
3496 }
3497 
3498 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
3499 					    struct amdgpu_irq_src *src,
3500 					    unsigned type,
3501 					    enum amdgpu_interrupt_state state)
3502 {
3503 	switch (type) {
3504 	case AMDGPU_CP_IRQ_GFX_EOP:
3505 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
3506 		break;
3507 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
3508 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
3509 		break;
3510 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
3511 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
3512 		break;
3513 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
3514 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
3515 		break;
3516 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
3517 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
3518 		break;
3519 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
3520 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
3521 		break;
3522 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
3523 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
3524 		break;
3525 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
3526 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
3527 		break;
3528 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
3529 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
3530 		break;
3531 	default:
3532 		break;
3533 	}
3534 	return 0;
3535 }
3536 
3537 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
3538 			    struct amdgpu_irq_src *source,
3539 			    struct amdgpu_iv_entry *entry)
3540 {
3541 	int i;
3542 	u8 me_id, pipe_id, queue_id;
3543 	struct amdgpu_ring *ring;
3544 
3545 	DRM_DEBUG("IH: CP EOP\n");
3546 	me_id = (entry->ring_id & 0x0c) >> 2;
3547 	pipe_id = (entry->ring_id & 0x03) >> 0;
3548 	queue_id = (entry->ring_id & 0x70) >> 4;
3549 
3550 	switch (me_id) {
3551 	case 0:
3552 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
3553 		break;
3554 	case 1:
3555 	case 2:
3556 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3557 			ring = &adev->gfx.compute_ring[i];
3558 			/* Per-queue interrupt is supported for MEC starting from VI.
3559 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
3560 			  */
3561 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
3562 				amdgpu_fence_process(ring);
3563 		}
3564 		break;
3565 	}
3566 	return 0;
3567 }
3568 
3569 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
3570 				 struct amdgpu_irq_src *source,
3571 				 struct amdgpu_iv_entry *entry)
3572 {
3573 	DRM_ERROR("Illegal register access in command stream\n");
3574 	schedule_work(&adev->reset_work);
3575 	return 0;
3576 }
3577 
3578 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
3579 				  struct amdgpu_irq_src *source,
3580 				  struct amdgpu_iv_entry *entry)
3581 {
3582 	DRM_ERROR("Illegal instruction in command stream\n");
3583 	schedule_work(&adev->reset_work);
3584 	return 0;
3585 }
3586 
3587 static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
3588 					    struct amdgpu_irq_src *src,
3589 					    unsigned int type,
3590 					    enum amdgpu_interrupt_state state)
3591 {
3592 	uint32_t tmp, target;
3593 	struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
3594 
3595 	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
3596 
3597 	if (ring->me == 1)
3598 		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
3599 	else
3600 		target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL);
3601 	target += ring->pipe;
3602 
3603 	switch (type) {
3604 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
3605 		if (state == AMDGPU_IRQ_STATE_DISABLE) {
3606 			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
3607 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
3608 						 GENERIC2_INT_ENABLE, 0);
3609 			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
3610 
3611 			tmp = RREG32(target);
3612 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
3613 						 GENERIC2_INT_ENABLE, 0);
3614 			WREG32(target, tmp);
3615 		} else {
3616 			tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL));
3617 			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
3618 						 GENERIC2_INT_ENABLE, 1);
3619 			WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), tmp);
3620 
3621 			tmp = RREG32(target);
3622 			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
3623 						 GENERIC2_INT_ENABLE, 1);
3624 			WREG32(target, tmp);
3625 		}
3626 		break;
3627 	default:
3628 		BUG(); /* kiq only support GENERIC2_INT now */
3629 		break;
3630 	}
3631 	return 0;
3632 }
3633 
3634 static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev,
3635 			    struct amdgpu_irq_src *source,
3636 			    struct amdgpu_iv_entry *entry)
3637 {
3638 	u8 me_id, pipe_id, queue_id;
3639 	struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
3640 
3641 	BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
3642 
3643 	me_id = (entry->ring_id & 0x0c) >> 2;
3644 	pipe_id = (entry->ring_id & 0x03) >> 0;
3645 	queue_id = (entry->ring_id & 0x70) >> 4;
3646 	DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
3647 		   me_id, pipe_id, queue_id);
3648 
3649 	amdgpu_fence_process(ring);
3650 	return 0;
3651 }
3652 
3653 const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
3654 	.name = "gfx_v9_0",
3655 	.early_init = gfx_v9_0_early_init,
3656 	.late_init = gfx_v9_0_late_init,
3657 	.sw_init = gfx_v9_0_sw_init,
3658 	.sw_fini = gfx_v9_0_sw_fini,
3659 	.hw_init = gfx_v9_0_hw_init,
3660 	.hw_fini = gfx_v9_0_hw_fini,
3661 	.suspend = gfx_v9_0_suspend,
3662 	.resume = gfx_v9_0_resume,
3663 	.is_idle = gfx_v9_0_is_idle,
3664 	.wait_for_idle = gfx_v9_0_wait_for_idle,
3665 	.soft_reset = gfx_v9_0_soft_reset,
3666 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
3667 	.set_powergating_state = gfx_v9_0_set_powergating_state,
3668 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
3669 };
3670 
3671 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
3672 	.type = AMDGPU_RING_TYPE_GFX,
3673 	.align_mask = 0xff,
3674 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
3675 	.support_64bit_ptrs = true,
3676 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
3677 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
3678 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
3679 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
3680 		5 +  /* COND_EXEC */
3681 		7 +  /* PIPELINE_SYNC */
3682 		46 + /* VM_FLUSH */
3683 		8 +  /* FENCE for VM_FLUSH */
3684 		20 + /* GDS switch */
3685 		4 + /* double SWITCH_BUFFER,
3686 		       the first COND_EXEC jump to the place just
3687 			   prior to this double SWITCH_BUFFER  */
3688 		5 + /* COND_EXEC */
3689 		7 +	 /*	HDP_flush */
3690 		4 +	 /*	VGT_flush */
3691 		14 + /*	CE_META */
3692 		31 + /*	DE_META */
3693 		3 + /* CNTX_CTRL */
3694 		5 + /* HDP_INVL */
3695 		8 + 8 + /* FENCE x2 */
3696 		2, /* SWITCH_BUFFER */
3697 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
3698 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
3699 	.emit_fence = gfx_v9_0_ring_emit_fence,
3700 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
3701 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
3702 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
3703 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
3704 	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
3705 	.test_ring = gfx_v9_0_ring_test_ring,
3706 	.test_ib = gfx_v9_0_ring_test_ib,
3707 	.insert_nop = amdgpu_ring_insert_nop,
3708 	.pad_ib = amdgpu_ring_generic_pad_ib,
3709 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
3710 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
3711 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
3712 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
3713 };
3714 
3715 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
3716 	.type = AMDGPU_RING_TYPE_COMPUTE,
3717 	.align_mask = 0xff,
3718 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
3719 	.support_64bit_ptrs = true,
3720 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
3721 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
3722 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
3723 	.emit_frame_size =
3724 		20 + /* gfx_v9_0_ring_emit_gds_switch */
3725 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
3726 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3727 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3728 		64 + /* gfx_v9_0_ring_emit_vm_flush */
3729 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
3730 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
3731 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
3732 	.emit_fence = gfx_v9_0_ring_emit_fence,
3733 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
3734 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
3735 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
3736 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
3737 	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
3738 	.test_ring = gfx_v9_0_ring_test_ring,
3739 	.test_ib = gfx_v9_0_ring_test_ib,
3740 	.insert_nop = amdgpu_ring_insert_nop,
3741 	.pad_ib = amdgpu_ring_generic_pad_ib,
3742 };
3743 
3744 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
3745 	.type = AMDGPU_RING_TYPE_KIQ,
3746 	.align_mask = 0xff,
3747 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
3748 	.support_64bit_ptrs = true,
3749 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
3750 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
3751 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
3752 	.emit_frame_size =
3753 		20 + /* gfx_v9_0_ring_emit_gds_switch */
3754 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
3755 		5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
3756 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
3757 		64 + /* gfx_v9_0_ring_emit_vm_flush */
3758 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
3759 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
3760 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
3761 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
3762 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
3763 	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
3764 	.test_ring = gfx_v9_0_ring_test_ring,
3765 	.test_ib = gfx_v9_0_ring_test_ib,
3766 	.insert_nop = amdgpu_ring_insert_nop,
3767 	.pad_ib = amdgpu_ring_generic_pad_ib,
3768 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
3769 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
3770 };
3771 
3772 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
3773 {
3774 	int i;
3775 
3776 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
3777 
3778 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3779 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
3780 
3781 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
3782 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
3783 }
3784 
3785 static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = {
3786 	.set = gfx_v9_0_kiq_set_interrupt_state,
3787 	.process = gfx_v9_0_kiq_irq,
3788 };
3789 
3790 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
3791 	.set = gfx_v9_0_set_eop_interrupt_state,
3792 	.process = gfx_v9_0_eop_irq,
3793 };
3794 
3795 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
3796 	.set = gfx_v9_0_set_priv_reg_fault_state,
3797 	.process = gfx_v9_0_priv_reg_irq,
3798 };
3799 
3800 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
3801 	.set = gfx_v9_0_set_priv_inst_fault_state,
3802 	.process = gfx_v9_0_priv_inst_irq,
3803 };
3804 
3805 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
3806 {
3807 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
3808 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
3809 
3810 	adev->gfx.priv_reg_irq.num_types = 1;
3811 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
3812 
3813 	adev->gfx.priv_inst_irq.num_types = 1;
3814 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
3815 
3816 	adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
3817 	adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs;
3818 }
3819 
3820 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
3821 {
3822 	switch (adev->asic_type) {
3823 	case CHIP_VEGA10:
3824 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
3825 		break;
3826 	default:
3827 		break;
3828 	}
3829 }
3830 
3831 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
3832 {
3833 	/* init asci gds info */
3834 	adev->gds.mem.total_size = RREG32(SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE));
3835 	adev->gds.gws.total_size = 64;
3836 	adev->gds.oa.total_size = 16;
3837 
3838 	if (adev->gds.mem.total_size == 64 * 1024) {
3839 		adev->gds.mem.gfx_partition_size = 4096;
3840 		adev->gds.mem.cs_partition_size = 4096;
3841 
3842 		adev->gds.gws.gfx_partition_size = 4;
3843 		adev->gds.gws.cs_partition_size = 4;
3844 
3845 		adev->gds.oa.gfx_partition_size = 4;
3846 		adev->gds.oa.cs_partition_size = 1;
3847 	} else {
3848 		adev->gds.mem.gfx_partition_size = 1024;
3849 		adev->gds.mem.cs_partition_size = 1024;
3850 
3851 		adev->gds.gws.gfx_partition_size = 16;
3852 		adev->gds.gws.cs_partition_size = 16;
3853 
3854 		adev->gds.oa.gfx_partition_size = 4;
3855 		adev->gds.oa.cs_partition_size = 4;
3856 	}
3857 }
3858 
3859 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
3860 {
3861 	u32 data, mask;
3862 
3863 	data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG));
3864 	data |= RREG32(SOC15_REG_OFFSET(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG));
3865 
3866 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
3867 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
3868 
3869 	mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
3870 
3871 	return (~data) & mask;
3872 }
3873 
3874 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
3875 				 struct amdgpu_cu_info *cu_info)
3876 {
3877 	int i, j, k, counter, active_cu_number = 0;
3878 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
3879 
3880 	if (!adev || !cu_info)
3881 		return -EINVAL;
3882 
3883 	memset(cu_info, 0, sizeof(*cu_info));
3884 
3885 	mutex_lock(&adev->grbm_idx_mutex);
3886 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3887 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3888 			mask = 1;
3889 			ao_bitmap = 0;
3890 			counter = 0;
3891 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
3892 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
3893 			cu_info->bitmap[i][j] = bitmap;
3894 
3895 			for (k = 0; k < 16; k ++) {
3896 				if (bitmap & mask) {
3897 					if (counter < 2)
3898 						ao_bitmap |= mask;
3899 					counter ++;
3900 				}
3901 				mask <<= 1;
3902 			}
3903 			active_cu_number += counter;
3904 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
3905 		}
3906 	}
3907 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3908 	mutex_unlock(&adev->grbm_idx_mutex);
3909 
3910 	cu_info->number = active_cu_number;
3911 	cu_info->ao_cu_mask = ao_cu_mask;
3912 
3913 	return 0;
3914 }
3915 
3916 static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
3917 {
3918 	int r, j;
3919 	u32 tmp;
3920 	bool use_doorbell = true;
3921 	u64 hqd_gpu_addr;
3922 	u64 mqd_gpu_addr;
3923 	u64 eop_gpu_addr;
3924 	u64 wb_gpu_addr;
3925 	u32 *buf;
3926 	struct v9_mqd *mqd;
3927 	struct amdgpu_device *adev;
3928 
3929 	adev = ring->adev;
3930 	if (ring->mqd_obj == NULL) {
3931 		r = amdgpu_bo_create(adev,
3932 				sizeof(struct v9_mqd),
3933 				PAGE_SIZE,true,
3934 				AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3935 				NULL, &ring->mqd_obj);
3936 		if (r) {
3937 			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3938 			return r;
3939 		}
3940 	}
3941 
3942 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3943 	if (unlikely(r != 0)) {
3944 		gfx_v9_0_cp_compute_fini(adev);
3945 		return r;
3946 	}
3947 
3948 	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3949 				  &mqd_gpu_addr);
3950 	if (r) {
3951 		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3952 		gfx_v9_0_cp_compute_fini(adev);
3953 		return r;
3954 	}
3955 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3956 	if (r) {
3957 		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3958 		gfx_v9_0_cp_compute_fini(adev);
3959 		return r;
3960 	}
3961 
3962 	/* init the mqd struct */
3963 	memset(buf, 0, sizeof(struct v9_mqd));
3964 
3965 	mqd = (struct v9_mqd *)buf;
3966 	mqd->header = 0xC0310800;
3967 	mqd->compute_pipelinestat_enable = 0x00000001;
3968 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3969 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3970 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3971 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3972 	mqd->compute_misc_reserved = 0x00000003;
3973 	mutex_lock(&adev->srbm_mutex);
3974 	soc15_grbm_select(adev, ring->me,
3975 			       ring->pipe,
3976 			       ring->queue, 0);
3977 	/* disable wptr polling */
3978 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
3979 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3980 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
3981 
3982 	/* write the EOP addr */
3983 	BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
3984 	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
3985 	eop_gpu_addr >>= 8;
3986 
3987 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR), lower_32_bits(eop_gpu_addr));
3988 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI), upper_32_bits(eop_gpu_addr));
3989 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
3990 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
3991 
3992 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3993 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL));
3994 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3995 				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
3996 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_CONTROL), tmp);
3997 
3998 	/* enable doorbell? */
3999 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
4000 	if (use_doorbell)
4001 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4002 	else
4003 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4004 
4005 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), tmp);
4006 	mqd->cp_hqd_pq_doorbell_control = tmp;
4007 
4008 	/* disable the queue if it's active */
4009 	ring->wptr = 0;
4010 	mqd->cp_hqd_dequeue_request = 0;
4011 	mqd->cp_hqd_pq_rptr = 0;
4012 	mqd->cp_hqd_pq_wptr_lo = 0;
4013 	mqd->cp_hqd_pq_wptr_hi = 0;
4014 	if (RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1) {
4015 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), 1);
4016 		for (j = 0; j < adev->usec_timeout; j++) {
4017 			if (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)) & 1))
4018 				break;
4019 			udelay(1);
4020 		}
4021 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), mqd->cp_hqd_dequeue_request);
4022 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR), mqd->cp_hqd_pq_rptr);
4023 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
4024 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
4025 	}
4026 
4027 	/* set the pointer to the MQD */
4028 	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4029 	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4030 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR), mqd->cp_mqd_base_addr_lo);
4031 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR_HI), mqd->cp_mqd_base_addr_hi);
4032 
4033 	/* set MQD vmid to 0 */
4034 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL));
4035 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4036 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MQD_CONTROL), tmp);
4037 	mqd->cp_mqd_control = tmp;
4038 
4039 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4040 	hqd_gpu_addr = ring->gpu_addr >> 8;
4041 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4042 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4043 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE), mqd->cp_hqd_pq_base_lo);
4044 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI), mqd->cp_hqd_pq_base_hi);
4045 
4046 	/* set up the HQD, this is similar to CP_RB0_CNTL */
4047 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL));
4048 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4049 		(order_base_2(ring->ring_size / 4) - 1));
4050 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4051 		((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4052 #ifdef __BIG_ENDIAN
4053 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4054 #endif
4055 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4056 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4057 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4058 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4059 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_CONTROL), tmp);
4060 	mqd->cp_hqd_pq_control = tmp;
4061 
4062 	/* set the wb address wether it's enabled or not */
4063 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4064 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4065 	mqd->cp_hqd_pq_rptr_report_addr_hi =
4066 	upper_32_bits(wb_gpu_addr) & 0xffff;
4067 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR),
4068 		mqd->cp_hqd_pq_rptr_report_addr_lo);
4069 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI),
4070 		mqd->cp_hqd_pq_rptr_report_addr_hi);
4071 
4072 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4073 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4074 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4075 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4076 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
4077 		mqd->cp_hqd_pq_wptr_poll_addr_lo);
4078 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
4079 		mqd->cp_hqd_pq_wptr_poll_addr_hi);
4080 
4081 	/* enable the doorbell if requested */
4082 	if (use_doorbell) {
4083 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER),
4084 			(AMDGPU_DOORBELL64_KIQ * 2) << 2);
4085 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER),
4086 			(AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
4087 		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL));
4088 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4089 			DOORBELL_OFFSET, ring->doorbell_index);
4090 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4091 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4092 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4093 		mqd->cp_hqd_pq_doorbell_control = tmp;
4094 
4095 	} else {
4096 		mqd->cp_hqd_pq_doorbell_control = 0;
4097 	}
4098 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
4099 		mqd->cp_hqd_pq_doorbell_control);
4100 
4101 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4102 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), mqd->cp_hqd_pq_wptr_lo);
4103 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), mqd->cp_hqd_pq_wptr_hi);
4104 
4105 	/* set the vmid for the queue */
4106 	mqd->cp_hqd_vmid = 0;
4107 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_VMID), mqd->cp_hqd_vmid);
4108 
4109 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE));
4110 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4111 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PERSISTENT_STATE), tmp);
4112 	mqd->cp_hqd_persistent_state = tmp;
4113 
4114 	/* activate the queue */
4115 	mqd->cp_hqd_active = 1;
4116 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), mqd->cp_hqd_active);
4117 
4118 	soc15_grbm_select(adev, 0, 0, 0, 0);
4119 	mutex_unlock(&adev->srbm_mutex);
4120 
4121 	amdgpu_bo_kunmap(ring->mqd_obj);
4122 	amdgpu_bo_unreserve(ring->mqd_obj);
4123 
4124 	if (use_doorbell) {
4125 		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
4126 		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4127 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
4128 	}
4129 
4130 	return 0;
4131 }
4132 
4133 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
4134 {
4135 	.type = AMD_IP_BLOCK_TYPE_GFX,
4136 	.major = 9,
4137 	.minor = 0,
4138 	.rev = 0,
4139 	.funcs = &gfx_v9_0_ip_funcs,
4140 };
4141