xref: /linux/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c (revision ebf68996de0ab250c5d520eb2291ab65643e9a1e)
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29 
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36 
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39 #include "vega10_enum.h"
40 #include "hdp/hdp_4_0_offset.h"
41 
42 #include "soc15.h"
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46 
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48 
49 #include "amdgpu_ras.h"
50 
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55 
56 #define mmPWR_MISC_CNTL_STATUS					0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX				0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT	0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT		0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK		0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK		0x00000006L
62 
63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
65 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
69 
70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
72 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
76 
77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
79 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
83 
84 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
86 MODULE_FIRMWARE("amdgpu/raven_me.bin");
87 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
90 
91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
93 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
98 
99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
101 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
106 
107 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
108 {
109 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
110 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
111 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
112 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
113 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
114 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
115 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
116 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
117 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
118 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
119 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
120 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
121 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
122 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
123 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
124 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
125 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
126 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
127 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
128 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
129 };
130 
131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
132 {
133 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
134 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
135 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
136 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
137 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
138 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
139 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
140 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
141 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
142 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
143 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
144 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
145 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
146 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
147 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
148 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
149 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
150 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
151 };
152 
153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
154 {
155 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
156 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
157 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
158 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
159 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
160 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
161 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
162 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
163 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
164 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
165 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
166 };
167 
168 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
169 {
170 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
171 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
172 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
173 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
174 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
175 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
176 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
177 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
178 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
179 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
180 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
181 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
182 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
183 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
184 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
185 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
186 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
187 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
188 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
189 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
190 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
191 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
192 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
193 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
194 };
195 
196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
197 {
198 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
199 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
200 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
201 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
202 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
203 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
204 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
205 };
206 
207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
208 {
209 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
210 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
211 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
212 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
213 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
214 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
215 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
216 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
217 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
218 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
219 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
220 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
221 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
222 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
223 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
224 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
225 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
226 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
227 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
228 };
229 
230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
231 {
232 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
233 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
234 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
235 };
236 
237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
238 {
239 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
240 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
241 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
242 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
243 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
244 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
245 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
246 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
247 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
248 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
249 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
250 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
251 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
252 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
253 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
254 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
255 };
256 
257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
258 {
259 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
260 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
261 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
262 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
263 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
264 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
265 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
266 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
267 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
268 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
269 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
270 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
271 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
272 };
273 
274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
275 {
276 	mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
277 	mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
278 	mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
279 	mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
280 	mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
281 	mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
282 	mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
283 	mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
284 };
285 
286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
287 {
288 	mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
289 	mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
290 	mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
291 	mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
292 	mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
293 	mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
294 	mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
295 	mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
296 };
297 
298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
302 
303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
308                                  struct amdgpu_cu_info *cu_info);
309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
312 
313 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
314 {
315 	switch (adev->asic_type) {
316 	case CHIP_VEGA10:
317 		if (!amdgpu_virt_support_skip_setting(adev)) {
318 			soc15_program_register_sequence(adev,
319 							 golden_settings_gc_9_0,
320 							 ARRAY_SIZE(golden_settings_gc_9_0));
321 			soc15_program_register_sequence(adev,
322 							 golden_settings_gc_9_0_vg10,
323 							 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
324 		}
325 		break;
326 	case CHIP_VEGA12:
327 		soc15_program_register_sequence(adev,
328 						golden_settings_gc_9_2_1,
329 						ARRAY_SIZE(golden_settings_gc_9_2_1));
330 		soc15_program_register_sequence(adev,
331 						golden_settings_gc_9_2_1_vg12,
332 						ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
333 		break;
334 	case CHIP_VEGA20:
335 		soc15_program_register_sequence(adev,
336 						golden_settings_gc_9_0,
337 						ARRAY_SIZE(golden_settings_gc_9_0));
338 		soc15_program_register_sequence(adev,
339 						golden_settings_gc_9_0_vg20,
340 						ARRAY_SIZE(golden_settings_gc_9_0_vg20));
341 		break;
342 	case CHIP_RAVEN:
343 		soc15_program_register_sequence(adev, golden_settings_gc_9_1,
344 						ARRAY_SIZE(golden_settings_gc_9_1));
345 		if (adev->rev_id >= 8)
346 			soc15_program_register_sequence(adev,
347 							golden_settings_gc_9_1_rv2,
348 							ARRAY_SIZE(golden_settings_gc_9_1_rv2));
349 		else
350 			soc15_program_register_sequence(adev,
351 							golden_settings_gc_9_1_rv1,
352 							ARRAY_SIZE(golden_settings_gc_9_1_rv1));
353 		break;
354 	default:
355 		break;
356 	}
357 
358 	soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
359 					(const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
360 }
361 
362 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
363 {
364 	adev->gfx.scratch.num_reg = 8;
365 	adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
366 	adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
367 }
368 
369 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
370 				       bool wc, uint32_t reg, uint32_t val)
371 {
372 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
373 	amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
374 				WRITE_DATA_DST_SEL(0) |
375 				(wc ? WR_CONFIRM : 0));
376 	amdgpu_ring_write(ring, reg);
377 	amdgpu_ring_write(ring, 0);
378 	amdgpu_ring_write(ring, val);
379 }
380 
381 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
382 				  int mem_space, int opt, uint32_t addr0,
383 				  uint32_t addr1, uint32_t ref, uint32_t mask,
384 				  uint32_t inv)
385 {
386 	amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
387 	amdgpu_ring_write(ring,
388 				 /* memory (1) or register (0) */
389 				 (WAIT_REG_MEM_MEM_SPACE(mem_space) |
390 				 WAIT_REG_MEM_OPERATION(opt) | /* wait */
391 				 WAIT_REG_MEM_FUNCTION(3) |  /* equal */
392 				 WAIT_REG_MEM_ENGINE(eng_sel)));
393 
394 	if (mem_space)
395 		BUG_ON(addr0 & 0x3); /* Dword align */
396 	amdgpu_ring_write(ring, addr0);
397 	amdgpu_ring_write(ring, addr1);
398 	amdgpu_ring_write(ring, ref);
399 	amdgpu_ring_write(ring, mask);
400 	amdgpu_ring_write(ring, inv); /* poll interval */
401 }
402 
403 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
404 {
405 	struct amdgpu_device *adev = ring->adev;
406 	uint32_t scratch;
407 	uint32_t tmp = 0;
408 	unsigned i;
409 	int r;
410 
411 	r = amdgpu_gfx_scratch_get(adev, &scratch);
412 	if (r)
413 		return r;
414 
415 	WREG32(scratch, 0xCAFEDEAD);
416 	r = amdgpu_ring_alloc(ring, 3);
417 	if (r)
418 		goto error_free_scratch;
419 
420 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
421 	amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
422 	amdgpu_ring_write(ring, 0xDEADBEEF);
423 	amdgpu_ring_commit(ring);
424 
425 	for (i = 0; i < adev->usec_timeout; i++) {
426 		tmp = RREG32(scratch);
427 		if (tmp == 0xDEADBEEF)
428 			break;
429 		udelay(1);
430 	}
431 
432 	if (i >= adev->usec_timeout)
433 		r = -ETIMEDOUT;
434 
435 error_free_scratch:
436 	amdgpu_gfx_scratch_free(adev, scratch);
437 	return r;
438 }
439 
440 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
441 {
442 	struct amdgpu_device *adev = ring->adev;
443 	struct amdgpu_ib ib;
444 	struct dma_fence *f = NULL;
445 
446 	unsigned index;
447 	uint64_t gpu_addr;
448 	uint32_t tmp;
449 	long r;
450 
451 	r = amdgpu_device_wb_get(adev, &index);
452 	if (r)
453 		return r;
454 
455 	gpu_addr = adev->wb.gpu_addr + (index * 4);
456 	adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
457 	memset(&ib, 0, sizeof(ib));
458 	r = amdgpu_ib_get(adev, NULL, 16, &ib);
459 	if (r)
460 		goto err1;
461 
462 	ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
463 	ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
464 	ib.ptr[2] = lower_32_bits(gpu_addr);
465 	ib.ptr[3] = upper_32_bits(gpu_addr);
466 	ib.ptr[4] = 0xDEADBEEF;
467 	ib.length_dw = 5;
468 
469 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
470 	if (r)
471 		goto err2;
472 
473 	r = dma_fence_wait_timeout(f, false, timeout);
474 	if (r == 0) {
475 		r = -ETIMEDOUT;
476 		goto err2;
477 	} else if (r < 0) {
478 		goto err2;
479 	}
480 
481 	tmp = adev->wb.wb[index];
482 	if (tmp == 0xDEADBEEF)
483 		r = 0;
484 	else
485 		r = -EINVAL;
486 
487 err2:
488 	amdgpu_ib_free(adev, &ib, NULL);
489 	dma_fence_put(f);
490 err1:
491 	amdgpu_device_wb_free(adev, index);
492 	return r;
493 }
494 
495 
496 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
497 {
498 	release_firmware(adev->gfx.pfp_fw);
499 	adev->gfx.pfp_fw = NULL;
500 	release_firmware(adev->gfx.me_fw);
501 	adev->gfx.me_fw = NULL;
502 	release_firmware(adev->gfx.ce_fw);
503 	adev->gfx.ce_fw = NULL;
504 	release_firmware(adev->gfx.rlc_fw);
505 	adev->gfx.rlc_fw = NULL;
506 	release_firmware(adev->gfx.mec_fw);
507 	adev->gfx.mec_fw = NULL;
508 	release_firmware(adev->gfx.mec2_fw);
509 	adev->gfx.mec2_fw = NULL;
510 
511 	kfree(adev->gfx.rlc.register_list_format);
512 }
513 
514 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
515 {
516 	const struct rlc_firmware_header_v2_1 *rlc_hdr;
517 
518 	rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
519 	adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
520 	adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
521 	adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
522 	adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
523 	adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
524 	adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
525 	adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
526 	adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
527 	adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
528 	adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
529 	adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
530 	adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
531 	adev->gfx.rlc.reg_list_format_direct_reg_list_length =
532 			le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
533 }
534 
535 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
536 {
537 	adev->gfx.me_fw_write_wait = false;
538 	adev->gfx.mec_fw_write_wait = false;
539 
540 	switch (adev->asic_type) {
541 	case CHIP_VEGA10:
542 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
543 		    (adev->gfx.me_feature_version >= 42) &&
544 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
545 		    (adev->gfx.pfp_feature_version >= 42))
546 			adev->gfx.me_fw_write_wait = true;
547 
548 		if ((adev->gfx.mec_fw_version >=  0x00000193) &&
549 		    (adev->gfx.mec_feature_version >= 42))
550 			adev->gfx.mec_fw_write_wait = true;
551 		break;
552 	case CHIP_VEGA12:
553 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
554 		    (adev->gfx.me_feature_version >= 44) &&
555 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
556 		    (adev->gfx.pfp_feature_version >= 44))
557 			adev->gfx.me_fw_write_wait = true;
558 
559 		if ((adev->gfx.mec_fw_version >=  0x00000196) &&
560 		    (adev->gfx.mec_feature_version >= 44))
561 			adev->gfx.mec_fw_write_wait = true;
562 		break;
563 	case CHIP_VEGA20:
564 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
565 		    (adev->gfx.me_feature_version >= 44) &&
566 		    (adev->gfx.pfp_fw_version >=  0x000000b2) &&
567 		    (adev->gfx.pfp_feature_version >= 44))
568 			adev->gfx.me_fw_write_wait = true;
569 
570 		if ((adev->gfx.mec_fw_version >=  0x00000197) &&
571 		    (adev->gfx.mec_feature_version >= 44))
572 			adev->gfx.mec_fw_write_wait = true;
573 		break;
574 	case CHIP_RAVEN:
575 		if ((adev->gfx.me_fw_version >= 0x0000009c) &&
576 		    (adev->gfx.me_feature_version >= 42) &&
577 		    (adev->gfx.pfp_fw_version >=  0x000000b1) &&
578 		    (adev->gfx.pfp_feature_version >= 42))
579 			adev->gfx.me_fw_write_wait = true;
580 
581 		if ((adev->gfx.mec_fw_version >=  0x00000192) &&
582 		    (adev->gfx.mec_feature_version >= 42))
583 			adev->gfx.mec_fw_write_wait = true;
584 		break;
585 	default:
586 		break;
587 	}
588 }
589 
590 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
591 {
592 	switch (adev->asic_type) {
593 	case CHIP_VEGA10:
594 	case CHIP_VEGA12:
595 	case CHIP_VEGA20:
596 		break;
597 	case CHIP_RAVEN:
598 		if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
599 			break;
600 		if ((adev->gfx.rlc_fw_version != 106 &&
601 		     adev->gfx.rlc_fw_version < 531) ||
602 		    (adev->gfx.rlc_fw_version == 53815) ||
603 		    (adev->gfx.rlc_feature_version < 1) ||
604 		    !adev->gfx.rlc.is_rlc_v2_1)
605 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
606 		break;
607 	default:
608 		break;
609 	}
610 }
611 
612 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
613 {
614 	const char *chip_name;
615 	char fw_name[30];
616 	int err;
617 	struct amdgpu_firmware_info *info = NULL;
618 	const struct common_firmware_header *header = NULL;
619 	const struct gfx_firmware_header_v1_0 *cp_hdr;
620 	const struct rlc_firmware_header_v2_0 *rlc_hdr;
621 	unsigned int *tmp = NULL;
622 	unsigned int i = 0;
623 	uint16_t version_major;
624 	uint16_t version_minor;
625 	uint32_t smu_version;
626 
627 	DRM_DEBUG("\n");
628 
629 	switch (adev->asic_type) {
630 	case CHIP_VEGA10:
631 		chip_name = "vega10";
632 		break;
633 	case CHIP_VEGA12:
634 		chip_name = "vega12";
635 		break;
636 	case CHIP_VEGA20:
637 		chip_name = "vega20";
638 		break;
639 	case CHIP_RAVEN:
640 		if (adev->rev_id >= 8)
641 			chip_name = "raven2";
642 		else if (adev->pdev->device == 0x15d8)
643 			chip_name = "picasso";
644 		else
645 			chip_name = "raven";
646 		break;
647 	default:
648 		BUG();
649 	}
650 
651 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
652 	err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
653 	if (err)
654 		goto out;
655 	err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
656 	if (err)
657 		goto out;
658 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
659 	adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
660 	adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
661 
662 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
663 	err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
664 	if (err)
665 		goto out;
666 	err = amdgpu_ucode_validate(adev->gfx.me_fw);
667 	if (err)
668 		goto out;
669 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
670 	adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
671 	adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
672 
673 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
674 	err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
675 	if (err)
676 		goto out;
677 	err = amdgpu_ucode_validate(adev->gfx.ce_fw);
678 	if (err)
679 		goto out;
680 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
681 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
682 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
683 
684 	/*
685 	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
686 	 * instead of picasso_rlc.bin.
687 	 * Judgment method:
688 	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
689 	 *          or revision >= 0xD8 && revision <= 0xDF
690 	 * otherwise is PCO FP5
691 	 */
692 	if (!strcmp(chip_name, "picasso") &&
693 		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
694 		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
695 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
696 	else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
697 		(smu_version >= 0x41e2b))
698 		/**
699 		*SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
700 		*/
701 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
702 	else
703 		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
704 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
705 	if (err)
706 		goto out;
707 	err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
708 	rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
709 
710 	version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
711 	version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
712 	if (version_major == 2 && version_minor == 1)
713 		adev->gfx.rlc.is_rlc_v2_1 = true;
714 
715 	adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
716 	adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
717 	adev->gfx.rlc.save_and_restore_offset =
718 			le32_to_cpu(rlc_hdr->save_and_restore_offset);
719 	adev->gfx.rlc.clear_state_descriptor_offset =
720 			le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
721 	adev->gfx.rlc.avail_scratch_ram_locations =
722 			le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
723 	adev->gfx.rlc.reg_restore_list_size =
724 			le32_to_cpu(rlc_hdr->reg_restore_list_size);
725 	adev->gfx.rlc.reg_list_format_start =
726 			le32_to_cpu(rlc_hdr->reg_list_format_start);
727 	adev->gfx.rlc.reg_list_format_separate_start =
728 			le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
729 	adev->gfx.rlc.starting_offsets_start =
730 			le32_to_cpu(rlc_hdr->starting_offsets_start);
731 	adev->gfx.rlc.reg_list_format_size_bytes =
732 			le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
733 	adev->gfx.rlc.reg_list_size_bytes =
734 			le32_to_cpu(rlc_hdr->reg_list_size_bytes);
735 	adev->gfx.rlc.register_list_format =
736 			kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
737 				adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
738 	if (!adev->gfx.rlc.register_list_format) {
739 		err = -ENOMEM;
740 		goto out;
741 	}
742 
743 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
744 			le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
745 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
746 		adev->gfx.rlc.register_list_format[i] =	le32_to_cpu(tmp[i]);
747 
748 	adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
749 
750 	tmp = (unsigned int *)((uintptr_t)rlc_hdr +
751 			le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
752 	for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
753 		adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
754 
755 	if (adev->gfx.rlc.is_rlc_v2_1)
756 		gfx_v9_0_init_rlc_ext_microcode(adev);
757 
758 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
759 	err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
760 	if (err)
761 		goto out;
762 	err = amdgpu_ucode_validate(adev->gfx.mec_fw);
763 	if (err)
764 		goto out;
765 	cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
766 	adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
767 	adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
768 
769 
770 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
771 	err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
772 	if (!err) {
773 		err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
774 		if (err)
775 			goto out;
776 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)
777 		adev->gfx.mec2_fw->data;
778 		adev->gfx.mec2_fw_version =
779 		le32_to_cpu(cp_hdr->header.ucode_version);
780 		adev->gfx.mec2_feature_version =
781 		le32_to_cpu(cp_hdr->ucode_feature_version);
782 	} else {
783 		err = 0;
784 		adev->gfx.mec2_fw = NULL;
785 	}
786 
787 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
788 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
789 		info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
790 		info->fw = adev->gfx.pfp_fw;
791 		header = (const struct common_firmware_header *)info->fw->data;
792 		adev->firmware.fw_size +=
793 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
794 
795 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
796 		info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
797 		info->fw = adev->gfx.me_fw;
798 		header = (const struct common_firmware_header *)info->fw->data;
799 		adev->firmware.fw_size +=
800 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
801 
802 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
803 		info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
804 		info->fw = adev->gfx.ce_fw;
805 		header = (const struct common_firmware_header *)info->fw->data;
806 		adev->firmware.fw_size +=
807 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
808 
809 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
810 		info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
811 		info->fw = adev->gfx.rlc_fw;
812 		header = (const struct common_firmware_header *)info->fw->data;
813 		adev->firmware.fw_size +=
814 			ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
815 
816 		if (adev->gfx.rlc.is_rlc_v2_1 &&
817 		    adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
818 		    adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
819 		    adev->gfx.rlc.save_restore_list_srm_size_bytes) {
820 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
821 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
822 			info->fw = adev->gfx.rlc_fw;
823 			adev->firmware.fw_size +=
824 				ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
825 
826 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
827 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
828 			info->fw = adev->gfx.rlc_fw;
829 			adev->firmware.fw_size +=
830 				ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
831 
832 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
833 			info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
834 			info->fw = adev->gfx.rlc_fw;
835 			adev->firmware.fw_size +=
836 				ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
837 		}
838 
839 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
840 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
841 		info->fw = adev->gfx.mec_fw;
842 		header = (const struct common_firmware_header *)info->fw->data;
843 		cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
844 		adev->firmware.fw_size +=
845 			ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
846 
847 		info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
848 		info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
849 		info->fw = adev->gfx.mec_fw;
850 		adev->firmware.fw_size +=
851 			ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
852 
853 		if (adev->gfx.mec2_fw) {
854 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
855 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
856 			info->fw = adev->gfx.mec2_fw;
857 			header = (const struct common_firmware_header *)info->fw->data;
858 			cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
859 			adev->firmware.fw_size +=
860 				ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
861 			info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
862 			info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
863 			info->fw = adev->gfx.mec2_fw;
864 			adev->firmware.fw_size +=
865 				ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
866 		}
867 
868 	}
869 
870 out:
871 	gfx_v9_0_check_if_need_gfxoff(adev);
872 	gfx_v9_0_check_fw_write_wait(adev);
873 	if (err) {
874 		dev_err(adev->dev,
875 			"gfx9: Failed to load firmware \"%s\"\n",
876 			fw_name);
877 		release_firmware(adev->gfx.pfp_fw);
878 		adev->gfx.pfp_fw = NULL;
879 		release_firmware(adev->gfx.me_fw);
880 		adev->gfx.me_fw = NULL;
881 		release_firmware(adev->gfx.ce_fw);
882 		adev->gfx.ce_fw = NULL;
883 		release_firmware(adev->gfx.rlc_fw);
884 		adev->gfx.rlc_fw = NULL;
885 		release_firmware(adev->gfx.mec_fw);
886 		adev->gfx.mec_fw = NULL;
887 		release_firmware(adev->gfx.mec2_fw);
888 		adev->gfx.mec2_fw = NULL;
889 	}
890 	return err;
891 }
892 
893 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
894 {
895 	u32 count = 0;
896 	const struct cs_section_def *sect = NULL;
897 	const struct cs_extent_def *ext = NULL;
898 
899 	/* begin clear state */
900 	count += 2;
901 	/* context control state */
902 	count += 3;
903 
904 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
905 		for (ext = sect->section; ext->extent != NULL; ++ext) {
906 			if (sect->id == SECT_CONTEXT)
907 				count += 2 + ext->reg_count;
908 			else
909 				return 0;
910 		}
911 	}
912 
913 	/* end clear state */
914 	count += 2;
915 	/* clear state */
916 	count += 2;
917 
918 	return count;
919 }
920 
921 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
922 				    volatile u32 *buffer)
923 {
924 	u32 count = 0, i;
925 	const struct cs_section_def *sect = NULL;
926 	const struct cs_extent_def *ext = NULL;
927 
928 	if (adev->gfx.rlc.cs_data == NULL)
929 		return;
930 	if (buffer == NULL)
931 		return;
932 
933 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
934 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
935 
936 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
937 	buffer[count++] = cpu_to_le32(0x80000000);
938 	buffer[count++] = cpu_to_le32(0x80000000);
939 
940 	for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
941 		for (ext = sect->section; ext->extent != NULL; ++ext) {
942 			if (sect->id == SECT_CONTEXT) {
943 				buffer[count++] =
944 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
945 				buffer[count++] = cpu_to_le32(ext->reg_index -
946 						PACKET3_SET_CONTEXT_REG_START);
947 				for (i = 0; i < ext->reg_count; i++)
948 					buffer[count++] = cpu_to_le32(ext->extent[i]);
949 			} else {
950 				return;
951 			}
952 		}
953 	}
954 
955 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
956 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
957 
958 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
959 	buffer[count++] = cpu_to_le32(0);
960 }
961 
962 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
963 {
964 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
965 	uint32_t pg_always_on_cu_num = 2;
966 	uint32_t always_on_cu_num;
967 	uint32_t i, j, k;
968 	uint32_t mask, cu_bitmap, counter;
969 
970 	if (adev->flags & AMD_IS_APU)
971 		always_on_cu_num = 4;
972 	else if (adev->asic_type == CHIP_VEGA12)
973 		always_on_cu_num = 8;
974 	else
975 		always_on_cu_num = 12;
976 
977 	mutex_lock(&adev->grbm_idx_mutex);
978 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
979 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
980 			mask = 1;
981 			cu_bitmap = 0;
982 			counter = 0;
983 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
984 
985 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
986 				if (cu_info->bitmap[i][j] & mask) {
987 					if (counter == pg_always_on_cu_num)
988 						WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
989 					if (counter < always_on_cu_num)
990 						cu_bitmap |= mask;
991 					else
992 						break;
993 					counter++;
994 				}
995 				mask <<= 1;
996 			}
997 
998 			WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
999 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1000 		}
1001 	}
1002 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1003 	mutex_unlock(&adev->grbm_idx_mutex);
1004 }
1005 
1006 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1007 {
1008 	uint32_t data;
1009 
1010 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1011 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1012 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1013 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1014 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1015 
1016 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1017 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1018 
1019 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1020 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1021 
1022 	mutex_lock(&adev->grbm_idx_mutex);
1023 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1024 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1025 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1026 
1027 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1028 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1029 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1030 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1031 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1032 
1033 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1034 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1035 	data &= 0x0000FFFF;
1036 	data |= 0x00C00000;
1037 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1038 
1039 	/*
1040 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1041 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1042 	 */
1043 
1044 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1045 	 * but used for RLC_LB_CNTL configuration */
1046 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1047 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1048 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1049 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1050 	mutex_unlock(&adev->grbm_idx_mutex);
1051 
1052 	gfx_v9_0_init_always_on_cu_mask(adev);
1053 }
1054 
1055 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1056 {
1057 	uint32_t data;
1058 
1059 	/* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1060 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1061 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1062 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1063 	WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1064 
1065 	/* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1066 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1067 
1068 	/* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1069 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1070 
1071 	mutex_lock(&adev->grbm_idx_mutex);
1072 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1073 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1074 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1075 
1076 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
1077 	data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1078 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1079 	data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1080 	WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1081 
1082 	/* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1083 	data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1084 	data &= 0x0000FFFF;
1085 	data |= 0x00C00000;
1086 	WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1087 
1088 	/*
1089 	 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1090 	 * programmed in gfx_v9_0_init_always_on_cu_mask()
1091 	 */
1092 
1093 	/* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1094 	 * but used for RLC_LB_CNTL configuration */
1095 	data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1096 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1097 	data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1098 	WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1099 	mutex_unlock(&adev->grbm_idx_mutex);
1100 
1101 	gfx_v9_0_init_always_on_cu_mask(adev);
1102 }
1103 
1104 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1105 {
1106 	WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1107 }
1108 
1109 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1110 {
1111 	return 5;
1112 }
1113 
1114 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1115 {
1116 	const struct cs_section_def *cs_data;
1117 	int r;
1118 
1119 	adev->gfx.rlc.cs_data = gfx9_cs_data;
1120 
1121 	cs_data = adev->gfx.rlc.cs_data;
1122 
1123 	if (cs_data) {
1124 		/* init clear state block */
1125 		r = amdgpu_gfx_rlc_init_csb(adev);
1126 		if (r)
1127 			return r;
1128 	}
1129 
1130 	if (adev->asic_type == CHIP_RAVEN) {
1131 		/* TODO: double check the cp_table_size for RV */
1132 		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1133 		r = amdgpu_gfx_rlc_init_cpt(adev);
1134 		if (r)
1135 			return r;
1136 	}
1137 
1138 	switch (adev->asic_type) {
1139 	case CHIP_RAVEN:
1140 		gfx_v9_0_init_lbpw(adev);
1141 		break;
1142 	case CHIP_VEGA20:
1143 		gfx_v9_4_init_lbpw(adev);
1144 		break;
1145 	default:
1146 		break;
1147 	}
1148 
1149 	return 0;
1150 }
1151 
1152 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1153 {
1154 	int r;
1155 
1156 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1157 	if (unlikely(r != 0))
1158 		return r;
1159 
1160 	r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1161 			AMDGPU_GEM_DOMAIN_VRAM);
1162 	if (!r)
1163 		adev->gfx.rlc.clear_state_gpu_addr =
1164 			amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1165 
1166 	amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1167 
1168 	return r;
1169 }
1170 
1171 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1172 {
1173 	int r;
1174 
1175 	if (!adev->gfx.rlc.clear_state_obj)
1176 		return;
1177 
1178 	r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1179 	if (likely(r == 0)) {
1180 		amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1181 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1182 	}
1183 }
1184 
1185 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1186 {
1187 	amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1188 	amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1189 }
1190 
1191 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1192 {
1193 	int r;
1194 	u32 *hpd;
1195 	const __le32 *fw_data;
1196 	unsigned fw_size;
1197 	u32 *fw;
1198 	size_t mec_hpd_size;
1199 
1200 	const struct gfx_firmware_header_v1_0 *mec_hdr;
1201 
1202 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1203 
1204 	/* take ownership of the relevant compute queues */
1205 	amdgpu_gfx_compute_queue_acquire(adev);
1206 	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1207 
1208 	r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1209 				      AMDGPU_GEM_DOMAIN_VRAM,
1210 				      &adev->gfx.mec.hpd_eop_obj,
1211 				      &adev->gfx.mec.hpd_eop_gpu_addr,
1212 				      (void **)&hpd);
1213 	if (r) {
1214 		dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1215 		gfx_v9_0_mec_fini(adev);
1216 		return r;
1217 	}
1218 
1219 	memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1220 
1221 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1222 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1223 
1224 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1225 
1226 	fw_data = (const __le32 *)
1227 		(adev->gfx.mec_fw->data +
1228 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1229 	fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1230 
1231 	r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1232 				      PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1233 				      &adev->gfx.mec.mec_fw_obj,
1234 				      &adev->gfx.mec.mec_fw_gpu_addr,
1235 				      (void **)&fw);
1236 	if (r) {
1237 		dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1238 		gfx_v9_0_mec_fini(adev);
1239 		return r;
1240 	}
1241 
1242 	memcpy(fw, fw_data, fw_size);
1243 
1244 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1245 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1246 
1247 	return 0;
1248 }
1249 
1250 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1251 {
1252 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1253 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1254 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1255 		(address << SQ_IND_INDEX__INDEX__SHIFT) |
1256 		(SQ_IND_INDEX__FORCE_READ_MASK));
1257 	return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1258 }
1259 
1260 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1261 			   uint32_t wave, uint32_t thread,
1262 			   uint32_t regno, uint32_t num, uint32_t *out)
1263 {
1264 	WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1265 		(wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1266 		(simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1267 		(regno << SQ_IND_INDEX__INDEX__SHIFT) |
1268 		(thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1269 		(SQ_IND_INDEX__FORCE_READ_MASK) |
1270 		(SQ_IND_INDEX__AUTO_INCR_MASK));
1271 	while (num--)
1272 		*(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1273 }
1274 
1275 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1276 {
1277 	/* type 1 wave data */
1278 	dst[(*no_fields)++] = 1;
1279 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1280 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1281 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1282 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1283 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1284 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1285 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1286 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1287 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1288 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1289 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1290 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1291 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1292 	dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1293 }
1294 
1295 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1296 				     uint32_t wave, uint32_t start,
1297 				     uint32_t size, uint32_t *dst)
1298 {
1299 	wave_read_regs(
1300 		adev, simd, wave, 0,
1301 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1302 }
1303 
1304 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1305 				     uint32_t wave, uint32_t thread,
1306 				     uint32_t start, uint32_t size,
1307 				     uint32_t *dst)
1308 {
1309 	wave_read_regs(
1310 		adev, simd, wave, thread,
1311 		start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1312 }
1313 
1314 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1315 				  u32 me, u32 pipe, u32 q)
1316 {
1317 	soc15_grbm_select(adev, me, pipe, q, 0);
1318 }
1319 
1320 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1321 	.get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1322 	.select_se_sh = &gfx_v9_0_select_se_sh,
1323 	.read_wave_data = &gfx_v9_0_read_wave_data,
1324 	.read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1325 	.read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1326 	.select_me_pipe_q = &gfx_v9_0_select_me_pipe_q
1327 };
1328 
1329 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1330 {
1331 	u32 gb_addr_config;
1332 	int err;
1333 
1334 	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1335 
1336 	switch (adev->asic_type) {
1337 	case CHIP_VEGA10:
1338 		adev->gfx.config.max_hw_contexts = 8;
1339 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1340 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1341 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1342 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1343 		gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1344 		break;
1345 	case CHIP_VEGA12:
1346 		adev->gfx.config.max_hw_contexts = 8;
1347 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1348 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1349 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1350 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1351 		gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1352 		DRM_INFO("fix gfx.config for vega12\n");
1353 		break;
1354 	case CHIP_VEGA20:
1355 		adev->gfx.config.max_hw_contexts = 8;
1356 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1357 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1358 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1359 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1360 		gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1361 		gb_addr_config &= ~0xf3e777ff;
1362 		gb_addr_config |= 0x22014042;
1363 		/* check vbios table if gpu info is not available */
1364 		err = amdgpu_atomfirmware_get_gfx_info(adev);
1365 		if (err)
1366 			return err;
1367 		break;
1368 	case CHIP_RAVEN:
1369 		adev->gfx.config.max_hw_contexts = 8;
1370 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1371 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1372 		adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1373 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1374 		if (adev->rev_id >= 8)
1375 			gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1376 		else
1377 			gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1378 		break;
1379 	default:
1380 		BUG();
1381 		break;
1382 	}
1383 
1384 	adev->gfx.config.gb_addr_config = gb_addr_config;
1385 
1386 	adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1387 			REG_GET_FIELD(
1388 					adev->gfx.config.gb_addr_config,
1389 					GB_ADDR_CONFIG,
1390 					NUM_PIPES);
1391 
1392 	adev->gfx.config.max_tile_pipes =
1393 		adev->gfx.config.gb_addr_config_fields.num_pipes;
1394 
1395 	adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1396 			REG_GET_FIELD(
1397 					adev->gfx.config.gb_addr_config,
1398 					GB_ADDR_CONFIG,
1399 					NUM_BANKS);
1400 	adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1401 			REG_GET_FIELD(
1402 					adev->gfx.config.gb_addr_config,
1403 					GB_ADDR_CONFIG,
1404 					MAX_COMPRESSED_FRAGS);
1405 	adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1406 			REG_GET_FIELD(
1407 					adev->gfx.config.gb_addr_config,
1408 					GB_ADDR_CONFIG,
1409 					NUM_RB_PER_SE);
1410 	adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1411 			REG_GET_FIELD(
1412 					adev->gfx.config.gb_addr_config,
1413 					GB_ADDR_CONFIG,
1414 					NUM_SHADER_ENGINES);
1415 	adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1416 			REG_GET_FIELD(
1417 					adev->gfx.config.gb_addr_config,
1418 					GB_ADDR_CONFIG,
1419 					PIPE_INTERLEAVE_SIZE));
1420 
1421 	return 0;
1422 }
1423 
1424 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1425 				   struct amdgpu_ngg_buf *ngg_buf,
1426 				   int size_se,
1427 				   int default_size_se)
1428 {
1429 	int r;
1430 
1431 	if (size_se < 0) {
1432 		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1433 		return -EINVAL;
1434 	}
1435 	size_se = size_se ? size_se : default_size_se;
1436 
1437 	ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1438 	r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1439 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1440 				    &ngg_buf->bo,
1441 				    &ngg_buf->gpu_addr,
1442 				    NULL);
1443 	if (r) {
1444 		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1445 		return r;
1446 	}
1447 	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1448 
1449 	return r;
1450 }
1451 
1452 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
1453 {
1454 	int i;
1455 
1456 	for (i = 0; i < NGG_BUF_MAX; i++)
1457 		amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
1458 				      &adev->gfx.ngg.buf[i].gpu_addr,
1459 				      NULL);
1460 
1461 	memset(&adev->gfx.ngg.buf[0], 0,
1462 			sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
1463 
1464 	adev->gfx.ngg.init = false;
1465 
1466 	return 0;
1467 }
1468 
1469 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
1470 {
1471 	int r;
1472 
1473 	if (!amdgpu_ngg || adev->gfx.ngg.init == true)
1474 		return 0;
1475 
1476 	/* GDS reserve memory: 64 bytes alignment */
1477 	adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
1478 	adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
1479 	adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
1480 	adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
1481 
1482 	/* Primitive Buffer */
1483 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
1484 				    amdgpu_prim_buf_per_se,
1485 				    64 * 1024);
1486 	if (r) {
1487 		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
1488 		goto err;
1489 	}
1490 
1491 	/* Position Buffer */
1492 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
1493 				    amdgpu_pos_buf_per_se,
1494 				    256 * 1024);
1495 	if (r) {
1496 		dev_err(adev->dev, "Failed to create Position Buffer\n");
1497 		goto err;
1498 	}
1499 
1500 	/* Control Sideband */
1501 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
1502 				    amdgpu_cntl_sb_buf_per_se,
1503 				    256);
1504 	if (r) {
1505 		dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
1506 		goto err;
1507 	}
1508 
1509 	/* Parameter Cache, not created by default */
1510 	if (amdgpu_param_buf_per_se <= 0)
1511 		goto out;
1512 
1513 	r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
1514 				    amdgpu_param_buf_per_se,
1515 				    512 * 1024);
1516 	if (r) {
1517 		dev_err(adev->dev, "Failed to create Parameter Cache\n");
1518 		goto err;
1519 	}
1520 
1521 out:
1522 	adev->gfx.ngg.init = true;
1523 	return 0;
1524 err:
1525 	gfx_v9_0_ngg_fini(adev);
1526 	return r;
1527 }
1528 
1529 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
1530 {
1531 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
1532 	int r;
1533 	u32 data, base;
1534 
1535 	if (!amdgpu_ngg)
1536 		return 0;
1537 
1538 	/* Program buffer size */
1539 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
1540 			     adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
1541 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
1542 			     adev->gfx.ngg.buf[NGG_POS].size >> 8);
1543 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
1544 
1545 	data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
1546 			     adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
1547 	data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
1548 			     adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
1549 	WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
1550 
1551 	/* Program buffer base address */
1552 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1553 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
1554 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
1555 
1556 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
1557 	data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
1558 	WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
1559 
1560 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1561 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
1562 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
1563 
1564 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
1565 	data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
1566 	WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
1567 
1568 	base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1569 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
1570 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
1571 
1572 	base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
1573 	data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
1574 	WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
1575 
1576 	/* Clear GDS reserved memory */
1577 	r = amdgpu_ring_alloc(ring, 17);
1578 	if (r) {
1579 		DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
1580 			  ring->name, r);
1581 		return r;
1582 	}
1583 
1584 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1585 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
1586 			           (adev->gds.gds_size +
1587 				    adev->gfx.ngg.gds_reserve_size));
1588 
1589 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
1590 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
1591 				PACKET3_DMA_DATA_DST_SEL(1) |
1592 				PACKET3_DMA_DATA_SRC_SEL(2)));
1593 	amdgpu_ring_write(ring, 0);
1594 	amdgpu_ring_write(ring, 0);
1595 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
1596 	amdgpu_ring_write(ring, 0);
1597 	amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
1598 				adev->gfx.ngg.gds_reserve_size);
1599 
1600 	gfx_v9_0_write_data_to_reg(ring, 0, false,
1601 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
1602 
1603 	amdgpu_ring_commit(ring);
1604 
1605 	return 0;
1606 }
1607 
1608 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
1609 				      int mec, int pipe, int queue)
1610 {
1611 	int r;
1612 	unsigned irq_type;
1613 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
1614 
1615 	ring = &adev->gfx.compute_ring[ring_id];
1616 
1617 	/* mec0 is me1 */
1618 	ring->me = mec + 1;
1619 	ring->pipe = pipe;
1620 	ring->queue = queue;
1621 
1622 	ring->ring_obj = NULL;
1623 	ring->use_doorbell = true;
1624 	ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
1625 	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
1626 				+ (ring_id * GFX9_MEC_HPD_SIZE);
1627 	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1628 
1629 	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
1630 		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
1631 		+ ring->pipe;
1632 
1633 	/* type-2 packets are deprecated on MEC, use type-3 instead */
1634 	r = amdgpu_ring_init(adev, ring, 1024,
1635 			     &adev->gfx.eop_irq, irq_type);
1636 	if (r)
1637 		return r;
1638 
1639 
1640 	return 0;
1641 }
1642 
1643 static int gfx_v9_0_sw_init(void *handle)
1644 {
1645 	int i, j, k, r, ring_id;
1646 	struct amdgpu_ring *ring;
1647 	struct amdgpu_kiq *kiq;
1648 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1649 
1650 	switch (adev->asic_type) {
1651 	case CHIP_VEGA10:
1652 	case CHIP_VEGA12:
1653 	case CHIP_VEGA20:
1654 	case CHIP_RAVEN:
1655 		adev->gfx.mec.num_mec = 2;
1656 		break;
1657 	default:
1658 		adev->gfx.mec.num_mec = 1;
1659 		break;
1660 	}
1661 
1662 	adev->gfx.mec.num_pipe_per_mec = 4;
1663 	adev->gfx.mec.num_queue_per_pipe = 8;
1664 
1665 	/* EOP Event */
1666 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
1667 	if (r)
1668 		return r;
1669 
1670 	/* Privileged reg */
1671 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
1672 			      &adev->gfx.priv_reg_irq);
1673 	if (r)
1674 		return r;
1675 
1676 	/* Privileged inst */
1677 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
1678 			      &adev->gfx.priv_inst_irq);
1679 	if (r)
1680 		return r;
1681 
1682 	/* ECC error */
1683 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
1684 			      &adev->gfx.cp_ecc_error_irq);
1685 	if (r)
1686 		return r;
1687 
1688 	/* FUE error */
1689 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
1690 			      &adev->gfx.cp_ecc_error_irq);
1691 	if (r)
1692 		return r;
1693 
1694 	adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1695 
1696 	gfx_v9_0_scratch_init(adev);
1697 
1698 	r = gfx_v9_0_init_microcode(adev);
1699 	if (r) {
1700 		DRM_ERROR("Failed to load gfx firmware!\n");
1701 		return r;
1702 	}
1703 
1704 	r = adev->gfx.rlc.funcs->init(adev);
1705 	if (r) {
1706 		DRM_ERROR("Failed to init rlc BOs!\n");
1707 		return r;
1708 	}
1709 
1710 	r = gfx_v9_0_mec_init(adev);
1711 	if (r) {
1712 		DRM_ERROR("Failed to init MEC BOs!\n");
1713 		return r;
1714 	}
1715 
1716 	/* set up the gfx ring */
1717 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1718 		ring = &adev->gfx.gfx_ring[i];
1719 		ring->ring_obj = NULL;
1720 		if (!i)
1721 			sprintf(ring->name, "gfx");
1722 		else
1723 			sprintf(ring->name, "gfx_%d", i);
1724 		ring->use_doorbell = true;
1725 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
1726 		r = amdgpu_ring_init(adev, ring, 1024,
1727 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
1728 		if (r)
1729 			return r;
1730 	}
1731 
1732 	/* set up the compute queues - allocate horizontally across pipes */
1733 	ring_id = 0;
1734 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
1735 		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
1736 			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
1737 				if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
1738 					continue;
1739 
1740 				r = gfx_v9_0_compute_ring_init(adev,
1741 							       ring_id,
1742 							       i, k, j);
1743 				if (r)
1744 					return r;
1745 
1746 				ring_id++;
1747 			}
1748 		}
1749 	}
1750 
1751 	r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
1752 	if (r) {
1753 		DRM_ERROR("Failed to init KIQ BOs!\n");
1754 		return r;
1755 	}
1756 
1757 	kiq = &adev->gfx.kiq;
1758 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1759 	if (r)
1760 		return r;
1761 
1762 	/* create MQD for all compute queues as wel as KIQ for SRIOV case */
1763 	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
1764 	if (r)
1765 		return r;
1766 
1767 	adev->gfx.ce_ram_size = 0x8000;
1768 
1769 	r = gfx_v9_0_gpu_early_init(adev);
1770 	if (r)
1771 		return r;
1772 
1773 	r = gfx_v9_0_ngg_init(adev);
1774 	if (r)
1775 		return r;
1776 
1777 	return 0;
1778 }
1779 
1780 
1781 static int gfx_v9_0_sw_fini(void *handle)
1782 {
1783 	int i;
1784 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1785 
1786 	if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
1787 			adev->gfx.ras_if) {
1788 		struct ras_common_if *ras_if = adev->gfx.ras_if;
1789 		struct ras_ih_if ih_info = {
1790 			.head = *ras_if,
1791 		};
1792 
1793 		amdgpu_ras_debugfs_remove(adev, ras_if);
1794 		amdgpu_ras_sysfs_remove(adev, ras_if);
1795 		amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
1796 		amdgpu_ras_feature_enable(adev, ras_if, 0);
1797 		kfree(ras_if);
1798 	}
1799 
1800 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1801 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1802 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
1803 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1804 
1805 	amdgpu_gfx_compute_mqd_sw_fini(adev);
1806 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1807 	amdgpu_gfx_kiq_fini(adev);
1808 
1809 	gfx_v9_0_mec_fini(adev);
1810 	gfx_v9_0_ngg_fini(adev);
1811 	amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1812 	if (adev->asic_type == CHIP_RAVEN) {
1813 		amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
1814 				&adev->gfx.rlc.cp_table_gpu_addr,
1815 				(void **)&adev->gfx.rlc.cp_table_ptr);
1816 	}
1817 	gfx_v9_0_free_microcode(adev);
1818 
1819 	return 0;
1820 }
1821 
1822 
1823 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
1824 {
1825 	/* TODO */
1826 }
1827 
1828 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
1829 {
1830 	u32 data;
1831 
1832 	if (instance == 0xffffffff)
1833 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1834 	else
1835 		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
1836 
1837 	if (se_num == 0xffffffff)
1838 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1839 	else
1840 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1841 
1842 	if (sh_num == 0xffffffff)
1843 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1844 	else
1845 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1846 
1847 	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
1848 }
1849 
1850 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
1851 {
1852 	u32 data, mask;
1853 
1854 	data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
1855 	data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
1856 
1857 	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1858 	data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1859 
1860 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
1861 					 adev->gfx.config.max_sh_per_se);
1862 
1863 	return (~data) & mask;
1864 }
1865 
1866 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
1867 {
1868 	int i, j;
1869 	u32 data;
1870 	u32 active_rbs = 0;
1871 	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
1872 					adev->gfx.config.max_sh_per_se;
1873 
1874 	mutex_lock(&adev->grbm_idx_mutex);
1875 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1876 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1877 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1878 			data = gfx_v9_0_get_rb_active_bitmap(adev);
1879 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
1880 					       rb_bitmap_width_per_sh);
1881 		}
1882 	}
1883 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1884 	mutex_unlock(&adev->grbm_idx_mutex);
1885 
1886 	adev->gfx.config.backend_enable_mask = active_rbs;
1887 	adev->gfx.config.num_rbs = hweight32(active_rbs);
1888 }
1889 
1890 #define DEFAULT_SH_MEM_BASES	(0x6000)
1891 #define FIRST_COMPUTE_VMID	(8)
1892 #define LAST_COMPUTE_VMID	(16)
1893 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
1894 {
1895 	int i;
1896 	uint32_t sh_mem_config;
1897 	uint32_t sh_mem_bases;
1898 
1899 	/*
1900 	 * Configure apertures:
1901 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
1902 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
1903 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
1904 	 */
1905 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
1906 
1907 	sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
1908 			SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
1909 			SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
1910 
1911 	mutex_lock(&adev->srbm_mutex);
1912 	for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
1913 		soc15_grbm_select(adev, 0, 0, 0, i);
1914 		/* CP and shaders */
1915 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
1916 		WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
1917 	}
1918 	soc15_grbm_select(adev, 0, 0, 0, 0);
1919 	mutex_unlock(&adev->srbm_mutex);
1920 }
1921 
1922 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
1923 {
1924 	u32 tmp;
1925 	int i;
1926 
1927 	WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
1928 
1929 	gfx_v9_0_tiling_mode_table_init(adev);
1930 
1931 	gfx_v9_0_setup_rb(adev);
1932 	gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
1933 	adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
1934 
1935 	/* XXX SH_MEM regs */
1936 	/* where to put LDS, scratch, GPUVM in FSA64 space */
1937 	mutex_lock(&adev->srbm_mutex);
1938 	for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) {
1939 		soc15_grbm_select(adev, 0, 0, 0, i);
1940 		/* CP and shaders */
1941 		if (i == 0) {
1942 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1943 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1944 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1945 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
1946 		} else {
1947 			tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
1948 					    SH_MEM_ALIGNMENT_MODE_UNALIGNED);
1949 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
1950 			tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
1951 				(adev->gmc.private_aperture_start >> 48));
1952 			tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
1953 				(adev->gmc.shared_aperture_start >> 48));
1954 			WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
1955 		}
1956 	}
1957 	soc15_grbm_select(adev, 0, 0, 0, 0);
1958 
1959 	mutex_unlock(&adev->srbm_mutex);
1960 
1961 	gfx_v9_0_init_compute_vmid(adev);
1962 
1963 	mutex_lock(&adev->grbm_idx_mutex);
1964 	/*
1965 	 * making sure that the following register writes will be broadcasted
1966 	 * to all the shaders
1967 	 */
1968 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1969 
1970 	WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE,
1971 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
1972 			PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
1973 		   (adev->gfx.config.sc_prim_fifo_size_backend <<
1974 			PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
1975 		   (adev->gfx.config.sc_hiz_tile_fifo_size <<
1976 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
1977 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
1978 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
1979 	mutex_unlock(&adev->grbm_idx_mutex);
1980 
1981 }
1982 
1983 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
1984 {
1985 	u32 i, j, k;
1986 	u32 mask;
1987 
1988 	mutex_lock(&adev->grbm_idx_mutex);
1989 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1990 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1991 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1992 			for (k = 0; k < adev->usec_timeout; k++) {
1993 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
1994 					break;
1995 				udelay(1);
1996 			}
1997 			if (k == adev->usec_timeout) {
1998 				gfx_v9_0_select_se_sh(adev, 0xffffffff,
1999 						      0xffffffff, 0xffffffff);
2000 				mutex_unlock(&adev->grbm_idx_mutex);
2001 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2002 					 i, j);
2003 				return;
2004 			}
2005 		}
2006 	}
2007 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2008 	mutex_unlock(&adev->grbm_idx_mutex);
2009 
2010 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2011 		RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2012 		RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2013 		RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2014 	for (k = 0; k < adev->usec_timeout; k++) {
2015 		if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2016 			break;
2017 		udelay(1);
2018 	}
2019 }
2020 
2021 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2022 					       bool enable)
2023 {
2024 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2025 
2026 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2027 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2028 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2029 	tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2030 
2031 	WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2032 }
2033 
2034 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2035 {
2036 	/* csib */
2037 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2038 			adev->gfx.rlc.clear_state_gpu_addr >> 32);
2039 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2040 			adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2041 	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2042 			adev->gfx.rlc.clear_state_size);
2043 }
2044 
2045 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2046 				int indirect_offset,
2047 				int list_size,
2048 				int *unique_indirect_regs,
2049 				int unique_indirect_reg_count,
2050 				int *indirect_start_offsets,
2051 				int *indirect_start_offsets_count,
2052 				int max_start_offsets_count)
2053 {
2054 	int idx;
2055 
2056 	for (; indirect_offset < list_size; indirect_offset++) {
2057 		WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2058 		indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2059 		*indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2060 
2061 		while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2062 			indirect_offset += 2;
2063 
2064 			/* look for the matching indice */
2065 			for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2066 				if (unique_indirect_regs[idx] ==
2067 					register_list_format[indirect_offset] ||
2068 					!unique_indirect_regs[idx])
2069 					break;
2070 			}
2071 
2072 			BUG_ON(idx >= unique_indirect_reg_count);
2073 
2074 			if (!unique_indirect_regs[idx])
2075 				unique_indirect_regs[idx] = register_list_format[indirect_offset];
2076 
2077 			indirect_offset++;
2078 		}
2079 	}
2080 }
2081 
2082 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2083 {
2084 	int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2085 	int unique_indirect_reg_count = 0;
2086 
2087 	int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2088 	int indirect_start_offsets_count = 0;
2089 
2090 	int list_size = 0;
2091 	int i = 0, j = 0;
2092 	u32 tmp = 0;
2093 
2094 	u32 *register_list_format =
2095 		kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2096 	if (!register_list_format)
2097 		return -ENOMEM;
2098 	memcpy(register_list_format, adev->gfx.rlc.register_list_format,
2099 		adev->gfx.rlc.reg_list_format_size_bytes);
2100 
2101 	/* setup unique_indirect_regs array and indirect_start_offsets array */
2102 	unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2103 	gfx_v9_1_parse_ind_reg_list(register_list_format,
2104 				    adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2105 				    adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2106 				    unique_indirect_regs,
2107 				    unique_indirect_reg_count,
2108 				    indirect_start_offsets,
2109 				    &indirect_start_offsets_count,
2110 				    ARRAY_SIZE(indirect_start_offsets));
2111 
2112 	/* enable auto inc in case it is disabled */
2113 	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2114 	tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2115 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2116 
2117 	/* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2118 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2119 		RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2120 	for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2121 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2122 			adev->gfx.rlc.register_restore[i]);
2123 
2124 	/* load indirect register */
2125 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2126 		adev->gfx.rlc.reg_list_format_start);
2127 
2128 	/* direct register portion */
2129 	for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2130 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2131 			register_list_format[i]);
2132 
2133 	/* indirect register portion */
2134 	while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2135 		if (register_list_format[i] == 0xFFFFFFFF) {
2136 			WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2137 			continue;
2138 		}
2139 
2140 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2141 		WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2142 
2143 		for (j = 0; j < unique_indirect_reg_count; j++) {
2144 			if (register_list_format[i] == unique_indirect_regs[j]) {
2145 				WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2146 				break;
2147 			}
2148 		}
2149 
2150 		BUG_ON(j >= unique_indirect_reg_count);
2151 
2152 		i++;
2153 	}
2154 
2155 	/* set save/restore list size */
2156 	list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2157 	list_size = list_size >> 1;
2158 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2159 		adev->gfx.rlc.reg_restore_list_size);
2160 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2161 
2162 	/* write the starting offsets to RLC scratch ram */
2163 	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2164 		adev->gfx.rlc.starting_offsets_start);
2165 	for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2166 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2167 		       indirect_start_offsets[i]);
2168 
2169 	/* load unique indirect regs*/
2170 	for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2171 		if (unique_indirect_regs[i] != 0) {
2172 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2173 			       + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2174 			       unique_indirect_regs[i] & 0x3FFFF);
2175 
2176 			WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2177 			       + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2178 			       unique_indirect_regs[i] >> 20);
2179 		}
2180 	}
2181 
2182 	kfree(register_list_format);
2183 	return 0;
2184 }
2185 
2186 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2187 {
2188 	WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2189 }
2190 
2191 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2192 					     bool enable)
2193 {
2194 	uint32_t data = 0;
2195 	uint32_t default_data = 0;
2196 
2197 	default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2198 	if (enable == true) {
2199 		/* enable GFXIP control over CGPG */
2200 		data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2201 		if(default_data != data)
2202 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2203 
2204 		/* update status */
2205 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2206 		data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2207 		if(default_data != data)
2208 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2209 	} else {
2210 		/* restore GFXIP control over GCPG */
2211 		data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2212 		if(default_data != data)
2213 			WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2214 	}
2215 }
2216 
2217 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2218 {
2219 	uint32_t data = 0;
2220 
2221 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2222 			      AMD_PG_SUPPORT_GFX_SMG |
2223 			      AMD_PG_SUPPORT_GFX_DMG)) {
2224 		/* init IDLE_POLL_COUNT = 60 */
2225 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2226 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2227 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2228 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2229 
2230 		/* init RLC PG Delay */
2231 		data = 0;
2232 		data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2233 		data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2234 		data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2235 		data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2236 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2237 
2238 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2239 		data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2240 		data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2241 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2242 
2243 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2244 		data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2245 		data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2246 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2247 
2248 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2249 		data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2250 
2251 		/* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2252 		data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2253 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2254 
2255 		pwr_10_0_gfxip_control_over_cgpg(adev, true);
2256 	}
2257 }
2258 
2259 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2260 						bool enable)
2261 {
2262 	uint32_t data = 0;
2263 	uint32_t default_data = 0;
2264 
2265 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2266 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2267 			     SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2268 			     enable ? 1 : 0);
2269 	if (default_data != data)
2270 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2271 }
2272 
2273 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2274 						bool enable)
2275 {
2276 	uint32_t data = 0;
2277 	uint32_t default_data = 0;
2278 
2279 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2280 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2281 			     SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2282 			     enable ? 1 : 0);
2283 	if(default_data != data)
2284 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2285 }
2286 
2287 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2288 					bool enable)
2289 {
2290 	uint32_t data = 0;
2291 	uint32_t default_data = 0;
2292 
2293 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2294 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2295 			     CP_PG_DISABLE,
2296 			     enable ? 0 : 1);
2297 	if(default_data != data)
2298 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2299 }
2300 
2301 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2302 						bool enable)
2303 {
2304 	uint32_t data, default_data;
2305 
2306 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2307 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2308 			     GFX_POWER_GATING_ENABLE,
2309 			     enable ? 1 : 0);
2310 	if(default_data != data)
2311 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2312 }
2313 
2314 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2315 						bool enable)
2316 {
2317 	uint32_t data, default_data;
2318 
2319 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2320 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2321 			     GFX_PIPELINE_PG_ENABLE,
2322 			     enable ? 1 : 0);
2323 	if(default_data != data)
2324 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2325 
2326 	if (!enable)
2327 		/* read any GFX register to wake up GFX */
2328 		data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2329 }
2330 
2331 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2332 						       bool enable)
2333 {
2334 	uint32_t data, default_data;
2335 
2336 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2337 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2338 			     STATIC_PER_CU_PG_ENABLE,
2339 			     enable ? 1 : 0);
2340 	if(default_data != data)
2341 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2342 }
2343 
2344 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2345 						bool enable)
2346 {
2347 	uint32_t data, default_data;
2348 
2349 	default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2350 	data = REG_SET_FIELD(data, RLC_PG_CNTL,
2351 			     DYN_PER_CU_PG_ENABLE,
2352 			     enable ? 1 : 0);
2353 	if(default_data != data)
2354 		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2355 }
2356 
2357 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2358 {
2359 	gfx_v9_0_init_csb(adev);
2360 
2361 	/*
2362 	 * Rlc save restore list is workable since v2_1.
2363 	 * And it's needed by gfxoff feature.
2364 	 */
2365 	if (adev->gfx.rlc.is_rlc_v2_1) {
2366 		gfx_v9_1_init_rlc_save_restore_list(adev);
2367 		gfx_v9_0_enable_save_restore_machine(adev);
2368 	}
2369 
2370 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2371 			      AMD_PG_SUPPORT_GFX_SMG |
2372 			      AMD_PG_SUPPORT_GFX_DMG |
2373 			      AMD_PG_SUPPORT_CP |
2374 			      AMD_PG_SUPPORT_GDS |
2375 			      AMD_PG_SUPPORT_RLC_SMU_HS)) {
2376 		WREG32(mmRLC_JUMP_TABLE_RESTORE,
2377 		       adev->gfx.rlc.cp_table_gpu_addr >> 8);
2378 		gfx_v9_0_init_gfx_power_gating(adev);
2379 	}
2380 }
2381 
2382 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2383 {
2384 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2385 	gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2386 	gfx_v9_0_wait_for_rlc_serdes(adev);
2387 }
2388 
2389 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2390 {
2391 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2392 	udelay(50);
2393 	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2394 	udelay(50);
2395 }
2396 
2397 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2398 {
2399 #ifdef AMDGPU_RLC_DEBUG_RETRY
2400 	u32 rlc_ucode_ver;
2401 #endif
2402 
2403 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2404 	udelay(50);
2405 
2406 	/* carrizo do enable cp interrupt after cp inited */
2407 	if (!(adev->flags & AMD_IS_APU)) {
2408 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2409 		udelay(50);
2410 	}
2411 
2412 #ifdef AMDGPU_RLC_DEBUG_RETRY
2413 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
2414 	rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2415 	if(rlc_ucode_ver == 0x108) {
2416 		DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2417 				rlc_ucode_ver, adev->gfx.rlc_fw_version);
2418 		/* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2419 		 * default is 0x9C4 to create a 100us interval */
2420 		WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2421 		/* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2422 		 * to disable the page fault retry interrupts, default is
2423 		 * 0x100 (256) */
2424 		WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2425 	}
2426 #endif
2427 }
2428 
2429 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2430 {
2431 	const struct rlc_firmware_header_v2_0 *hdr;
2432 	const __le32 *fw_data;
2433 	unsigned i, fw_size;
2434 
2435 	if (!adev->gfx.rlc_fw)
2436 		return -EINVAL;
2437 
2438 	hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2439 	amdgpu_ucode_print_rlc_hdr(&hdr->header);
2440 
2441 	fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2442 			   le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2443 	fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2444 
2445 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
2446 			RLCG_UCODE_LOADING_START_ADDRESS);
2447 	for (i = 0; i < fw_size; i++)
2448 		WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2449 	WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2450 
2451 	return 0;
2452 }
2453 
2454 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
2455 {
2456 	int r;
2457 
2458 	if (amdgpu_sriov_vf(adev)) {
2459 		gfx_v9_0_init_csb(adev);
2460 		return 0;
2461 	}
2462 
2463 	adev->gfx.rlc.funcs->stop(adev);
2464 
2465 	/* disable CG */
2466 	WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
2467 
2468 	gfx_v9_0_init_pg(adev);
2469 
2470 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
2471 		/* legacy rlc firmware loading */
2472 		r = gfx_v9_0_rlc_load_microcode(adev);
2473 		if (r)
2474 			return r;
2475 	}
2476 
2477 	switch (adev->asic_type) {
2478 	case CHIP_RAVEN:
2479 		if (amdgpu_lbpw == 0)
2480 			gfx_v9_0_enable_lbpw(adev, false);
2481 		else
2482 			gfx_v9_0_enable_lbpw(adev, true);
2483 		break;
2484 	case CHIP_VEGA20:
2485 		if (amdgpu_lbpw > 0)
2486 			gfx_v9_0_enable_lbpw(adev, true);
2487 		else
2488 			gfx_v9_0_enable_lbpw(adev, false);
2489 		break;
2490 	default:
2491 		break;
2492 	}
2493 
2494 	adev->gfx.rlc.funcs->start(adev);
2495 
2496 	return 0;
2497 }
2498 
2499 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2500 {
2501 	int i;
2502 	u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
2503 
2504 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
2505 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
2506 	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
2507 	if (!enable) {
2508 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2509 			adev->gfx.gfx_ring[i].sched.ready = false;
2510 	}
2511 	WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
2512 	udelay(50);
2513 }
2514 
2515 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2516 {
2517 	const struct gfx_firmware_header_v1_0 *pfp_hdr;
2518 	const struct gfx_firmware_header_v1_0 *ce_hdr;
2519 	const struct gfx_firmware_header_v1_0 *me_hdr;
2520 	const __le32 *fw_data;
2521 	unsigned i, fw_size;
2522 
2523 	if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2524 		return -EINVAL;
2525 
2526 	pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2527 		adev->gfx.pfp_fw->data;
2528 	ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2529 		adev->gfx.ce_fw->data;
2530 	me_hdr = (const struct gfx_firmware_header_v1_0 *)
2531 		adev->gfx.me_fw->data;
2532 
2533 	amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2534 	amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2535 	amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2536 
2537 	gfx_v9_0_cp_gfx_enable(adev, false);
2538 
2539 	/* PFP */
2540 	fw_data = (const __le32 *)
2541 		(adev->gfx.pfp_fw->data +
2542 		 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2543 	fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2544 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
2545 	for (i = 0; i < fw_size; i++)
2546 		WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2547 	WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2548 
2549 	/* CE */
2550 	fw_data = (const __le32 *)
2551 		(adev->gfx.ce_fw->data +
2552 		 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2553 	fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2554 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
2555 	for (i = 0; i < fw_size; i++)
2556 		WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2557 	WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2558 
2559 	/* ME */
2560 	fw_data = (const __le32 *)
2561 		(adev->gfx.me_fw->data +
2562 		 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2563 	fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2564 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
2565 	for (i = 0; i < fw_size; i++)
2566 		WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2567 	WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2568 
2569 	return 0;
2570 }
2571 
2572 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
2573 {
2574 	struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2575 	const struct cs_section_def *sect = NULL;
2576 	const struct cs_extent_def *ext = NULL;
2577 	int r, i, tmp;
2578 
2579 	/* init the CP */
2580 	WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2581 	WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
2582 
2583 	gfx_v9_0_cp_gfx_enable(adev, true);
2584 
2585 	r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
2586 	if (r) {
2587 		DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2588 		return r;
2589 	}
2590 
2591 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2592 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2593 
2594 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2595 	amdgpu_ring_write(ring, 0x80000000);
2596 	amdgpu_ring_write(ring, 0x80000000);
2597 
2598 	for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
2599 		for (ext = sect->section; ext->extent != NULL; ++ext) {
2600 			if (sect->id == SECT_CONTEXT) {
2601 				amdgpu_ring_write(ring,
2602 				       PACKET3(PACKET3_SET_CONTEXT_REG,
2603 					       ext->reg_count));
2604 				amdgpu_ring_write(ring,
2605 				       ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2606 				for (i = 0; i < ext->reg_count; i++)
2607 					amdgpu_ring_write(ring, ext->extent[i]);
2608 			}
2609 		}
2610 	}
2611 
2612 	amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2613 	amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2614 
2615 	amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2616 	amdgpu_ring_write(ring, 0);
2617 
2618 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2619 	amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2620 	amdgpu_ring_write(ring, 0x8000);
2621 	amdgpu_ring_write(ring, 0x8000);
2622 
2623 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
2624 	tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
2625 		(SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
2626 	amdgpu_ring_write(ring, tmp);
2627 	amdgpu_ring_write(ring, 0);
2628 
2629 	amdgpu_ring_commit(ring);
2630 
2631 	return 0;
2632 }
2633 
2634 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
2635 {
2636 	struct amdgpu_ring *ring;
2637 	u32 tmp;
2638 	u32 rb_bufsz;
2639 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
2640 
2641 	/* Set the write pointer delay */
2642 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
2643 
2644 	/* set the RB to use vmid 0 */
2645 	WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
2646 
2647 	/* Set ring buffer size */
2648 	ring = &adev->gfx.gfx_ring[0];
2649 	rb_bufsz = order_base_2(ring->ring_size / 8);
2650 	tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2651 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2652 #ifdef __BIG_ENDIAN
2653 	tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2654 #endif
2655 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2656 
2657 	/* Initialize the ring buffer's write pointers */
2658 	ring->wptr = 0;
2659 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
2660 	WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
2661 
2662 	/* set the wb address wether it's enabled or not */
2663 	rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2664 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2665 	WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
2666 
2667 	wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2668 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
2669 	WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
2670 
2671 	mdelay(1);
2672 	WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
2673 
2674 	rb_addr = ring->gpu_addr >> 8;
2675 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
2676 	WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2677 
2678 	tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
2679 	if (ring->use_doorbell) {
2680 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2681 				    DOORBELL_OFFSET, ring->doorbell_index);
2682 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2683 				    DOORBELL_EN, 1);
2684 	} else {
2685 		tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
2686 	}
2687 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
2688 
2689 	tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2690 			DOORBELL_RANGE_LOWER, ring->doorbell_index);
2691 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2692 
2693 	WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
2694 		       CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2695 
2696 
2697 	/* start the ring */
2698 	gfx_v9_0_cp_gfx_start(adev);
2699 	ring->sched.ready = true;
2700 
2701 	return 0;
2702 }
2703 
2704 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2705 {
2706 	int i;
2707 
2708 	if (enable) {
2709 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
2710 	} else {
2711 		WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
2712 			(CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2713 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
2714 			adev->gfx.compute_ring[i].sched.ready = false;
2715 		adev->gfx.kiq.ring.sched.ready = false;
2716 	}
2717 	udelay(50);
2718 }
2719 
2720 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2721 {
2722 	const struct gfx_firmware_header_v1_0 *mec_hdr;
2723 	const __le32 *fw_data;
2724 	unsigned i;
2725 	u32 tmp;
2726 
2727 	if (!adev->gfx.mec_fw)
2728 		return -EINVAL;
2729 
2730 	gfx_v9_0_cp_compute_enable(adev, false);
2731 
2732 	mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2733 	amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2734 
2735 	fw_data = (const __le32 *)
2736 		(adev->gfx.mec_fw->data +
2737 		 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2738 	tmp = 0;
2739 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
2740 	tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
2741 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
2742 
2743 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
2744 		adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
2745 	WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
2746 		upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
2747 
2748 	/* MEC1 */
2749 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2750 			 mec_hdr->jt_offset);
2751 	for (i = 0; i < mec_hdr->jt_size; i++)
2752 		WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
2753 			le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
2754 
2755 	WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
2756 			adev->gfx.mec_fw_version);
2757 	/* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2758 
2759 	return 0;
2760 }
2761 
2762 /* KIQ functions */
2763 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2764 {
2765 	uint32_t tmp;
2766 	struct amdgpu_device *adev = ring->adev;
2767 
2768 	/* tell RLC which is KIQ queue */
2769 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
2770 	tmp &= 0xffffff00;
2771 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
2772 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2773 	tmp |= 0x80;
2774 	WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
2775 }
2776 
2777 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2778 {
2779 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2780 	uint64_t queue_mask = 0;
2781 	int r, i;
2782 
2783 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
2784 		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
2785 			continue;
2786 
2787 		/* This situation may be hit in the future if a new HW
2788 		 * generation exposes more than 64 queues. If so, the
2789 		 * definition of queue_mask needs updating */
2790 		if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
2791 			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
2792 			break;
2793 		}
2794 
2795 		queue_mask |= (1ull << i);
2796 	}
2797 
2798 	r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
2799 	if (r) {
2800 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2801 		return r;
2802 	}
2803 
2804 	/* set resources */
2805 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
2806 	amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
2807 			  PACKET3_SET_RESOURCES_QUEUE_TYPE(0));	/* vmid_mask:0 queue_type:0 (KIQ) */
2808 	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
2809 	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
2810 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
2811 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
2812 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
2813 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
2814 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2815 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2816 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
2817 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2818 
2819 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
2820 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
2821 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
2822 				  PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
2823 				  PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
2824 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
2825 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
2826 				  PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
2827 				  PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
2828 				  PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
2829 				  PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
2830 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
2831 		amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
2832 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
2833 		amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
2834 		amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
2835 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
2836 	}
2837 
2838 	r = amdgpu_ring_test_helper(kiq_ring);
2839 	if (r)
2840 		DRM_ERROR("KCQ enable failed\n");
2841 
2842 	return r;
2843 }
2844 
2845 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2846 {
2847 	struct amdgpu_device *adev = ring->adev;
2848 	struct v9_mqd *mqd = ring->mqd_ptr;
2849 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
2850 	uint32_t tmp;
2851 
2852 	mqd->header = 0xC0310800;
2853 	mqd->compute_pipelinestat_enable = 0x00000001;
2854 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2855 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2856 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2857 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2858 	mqd->compute_misc_reserved = 0x00000003;
2859 
2860 	mqd->dynamic_cu_mask_addr_lo =
2861 		lower_32_bits(ring->mqd_gpu_addr
2862 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2863 	mqd->dynamic_cu_mask_addr_hi =
2864 		upper_32_bits(ring->mqd_gpu_addr
2865 			      + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
2866 
2867 	eop_base_addr = ring->eop_gpu_addr >> 8;
2868 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
2869 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
2870 
2871 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2872 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
2873 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2874 			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
2875 
2876 	mqd->cp_hqd_eop_control = tmp;
2877 
2878 	/* enable doorbell? */
2879 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2880 
2881 	if (ring->use_doorbell) {
2882 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2883 				    DOORBELL_OFFSET, ring->doorbell_index);
2884 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2885 				    DOORBELL_EN, 1);
2886 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2887 				    DOORBELL_SOURCE, 0);
2888 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2889 				    DOORBELL_HIT, 0);
2890 	} else {
2891 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2892 					 DOORBELL_EN, 0);
2893 	}
2894 
2895 	mqd->cp_hqd_pq_doorbell_control = tmp;
2896 
2897 	/* disable the queue if it's active */
2898 	ring->wptr = 0;
2899 	mqd->cp_hqd_dequeue_request = 0;
2900 	mqd->cp_hqd_pq_rptr = 0;
2901 	mqd->cp_hqd_pq_wptr_lo = 0;
2902 	mqd->cp_hqd_pq_wptr_hi = 0;
2903 
2904 	/* set the pointer to the MQD */
2905 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
2906 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
2907 
2908 	/* set MQD vmid to 0 */
2909 	tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
2910 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2911 	mqd->cp_mqd_control = tmp;
2912 
2913 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2914 	hqd_gpu_addr = ring->gpu_addr >> 8;
2915 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2916 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2917 
2918 	/* set up the HQD, this is similar to CP_RB0_CNTL */
2919 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
2920 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2921 			    (order_base_2(ring->ring_size / 4) - 1));
2922 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2923 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2924 #ifdef __BIG_ENDIAN
2925 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
2926 #endif
2927 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
2928 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
2929 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
2930 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
2931 	mqd->cp_hqd_pq_control = tmp;
2932 
2933 	/* set the wb address whether it's enabled or not */
2934 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2935 	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
2936 	mqd->cp_hqd_pq_rptr_report_addr_hi =
2937 		upper_32_bits(wb_gpu_addr) & 0xffff;
2938 
2939 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
2940 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
2941 	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
2942 	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
2943 
2944 	tmp = 0;
2945 	/* enable the doorbell if requested */
2946 	if (ring->use_doorbell) {
2947 		tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
2948 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2949 				DOORBELL_OFFSET, ring->doorbell_index);
2950 
2951 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2952 					 DOORBELL_EN, 1);
2953 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2954 					 DOORBELL_SOURCE, 0);
2955 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
2956 					 DOORBELL_HIT, 0);
2957 	}
2958 
2959 	mqd->cp_hqd_pq_doorbell_control = tmp;
2960 
2961 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2962 	ring->wptr = 0;
2963 	mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
2964 
2965 	/* set the vmid for the queue */
2966 	mqd->cp_hqd_vmid = 0;
2967 
2968 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
2969 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
2970 	mqd->cp_hqd_persistent_state = tmp;
2971 
2972 	/* set MIN_IB_AVAIL_SIZE */
2973 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
2974 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
2975 	mqd->cp_hqd_ib_control = tmp;
2976 
2977 	/* activate the queue */
2978 	mqd->cp_hqd_active = 1;
2979 
2980 	return 0;
2981 }
2982 
2983 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
2984 {
2985 	struct amdgpu_device *adev = ring->adev;
2986 	struct v9_mqd *mqd = ring->mqd_ptr;
2987 	int j;
2988 
2989 	/* disable wptr polling */
2990 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2991 
2992 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
2993 	       mqd->cp_hqd_eop_base_addr_lo);
2994 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
2995 	       mqd->cp_hqd_eop_base_addr_hi);
2996 
2997 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2998 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
2999 	       mqd->cp_hqd_eop_control);
3000 
3001 	/* enable doorbell? */
3002 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3003 	       mqd->cp_hqd_pq_doorbell_control);
3004 
3005 	/* disable the queue if it's active */
3006 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3007 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3008 		for (j = 0; j < adev->usec_timeout; j++) {
3009 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3010 				break;
3011 			udelay(1);
3012 		}
3013 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3014 		       mqd->cp_hqd_dequeue_request);
3015 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3016 		       mqd->cp_hqd_pq_rptr);
3017 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3018 		       mqd->cp_hqd_pq_wptr_lo);
3019 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3020 		       mqd->cp_hqd_pq_wptr_hi);
3021 	}
3022 
3023 	/* set the pointer to the MQD */
3024 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3025 	       mqd->cp_mqd_base_addr_lo);
3026 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3027 	       mqd->cp_mqd_base_addr_hi);
3028 
3029 	/* set MQD vmid to 0 */
3030 	WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3031 	       mqd->cp_mqd_control);
3032 
3033 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3034 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3035 	       mqd->cp_hqd_pq_base_lo);
3036 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3037 	       mqd->cp_hqd_pq_base_hi);
3038 
3039 	/* set up the HQD, this is similar to CP_RB0_CNTL */
3040 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3041 	       mqd->cp_hqd_pq_control);
3042 
3043 	/* set the wb address whether it's enabled or not */
3044 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3045 				mqd->cp_hqd_pq_rptr_report_addr_lo);
3046 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3047 				mqd->cp_hqd_pq_rptr_report_addr_hi);
3048 
3049 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3050 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3051 	       mqd->cp_hqd_pq_wptr_poll_addr_lo);
3052 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3053 	       mqd->cp_hqd_pq_wptr_poll_addr_hi);
3054 
3055 	/* enable the doorbell if requested */
3056 	if (ring->use_doorbell) {
3057 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3058 					(adev->doorbell_index.kiq * 2) << 2);
3059 		WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3060 					(adev->doorbell_index.userqueue_end * 2) << 2);
3061 	}
3062 
3063 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3064 	       mqd->cp_hqd_pq_doorbell_control);
3065 
3066 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3067 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3068 	       mqd->cp_hqd_pq_wptr_lo);
3069 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3070 	       mqd->cp_hqd_pq_wptr_hi);
3071 
3072 	/* set the vmid for the queue */
3073 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3074 
3075 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3076 	       mqd->cp_hqd_persistent_state);
3077 
3078 	/* activate the queue */
3079 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3080 	       mqd->cp_hqd_active);
3081 
3082 	if (ring->use_doorbell)
3083 		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3084 
3085 	return 0;
3086 }
3087 
3088 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3089 {
3090 	struct amdgpu_device *adev = ring->adev;
3091 	int j;
3092 
3093 	/* disable the queue if it's active */
3094 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3095 
3096 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3097 
3098 		for (j = 0; j < adev->usec_timeout; j++) {
3099 			if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3100 				break;
3101 			udelay(1);
3102 		}
3103 
3104 		if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3105 			DRM_DEBUG("KIQ dequeue request failed.\n");
3106 
3107 			/* Manual disable if dequeue request times out */
3108 			WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3109 		}
3110 
3111 		WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3112 		      0);
3113 	}
3114 
3115 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3116 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3117 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3118 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3119 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3120 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3121 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3122 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3123 
3124 	return 0;
3125 }
3126 
3127 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3128 {
3129 	struct amdgpu_device *adev = ring->adev;
3130 	struct v9_mqd *mqd = ring->mqd_ptr;
3131 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3132 
3133 	gfx_v9_0_kiq_setting(ring);
3134 
3135 	if (adev->in_gpu_reset) { /* for GPU_RESET case */
3136 		/* reset MQD to a clean status */
3137 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3138 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3139 
3140 		/* reset ring buffer */
3141 		ring->wptr = 0;
3142 		amdgpu_ring_clear_ring(ring);
3143 
3144 		mutex_lock(&adev->srbm_mutex);
3145 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3146 		gfx_v9_0_kiq_init_register(ring);
3147 		soc15_grbm_select(adev, 0, 0, 0, 0);
3148 		mutex_unlock(&adev->srbm_mutex);
3149 	} else {
3150 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3151 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3152 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3153 		mutex_lock(&adev->srbm_mutex);
3154 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3155 		gfx_v9_0_mqd_init(ring);
3156 		gfx_v9_0_kiq_init_register(ring);
3157 		soc15_grbm_select(adev, 0, 0, 0, 0);
3158 		mutex_unlock(&adev->srbm_mutex);
3159 
3160 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3161 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3162 	}
3163 
3164 	return 0;
3165 }
3166 
3167 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3168 {
3169 	struct amdgpu_device *adev = ring->adev;
3170 	struct v9_mqd *mqd = ring->mqd_ptr;
3171 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
3172 
3173 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3174 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3175 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3176 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3177 		mutex_lock(&adev->srbm_mutex);
3178 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3179 		gfx_v9_0_mqd_init(ring);
3180 		soc15_grbm_select(adev, 0, 0, 0, 0);
3181 		mutex_unlock(&adev->srbm_mutex);
3182 
3183 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3184 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3185 	} else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3186 		/* reset MQD to a clean status */
3187 		if (adev->gfx.mec.mqd_backup[mqd_idx])
3188 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3189 
3190 		/* reset ring buffer */
3191 		ring->wptr = 0;
3192 		amdgpu_ring_clear_ring(ring);
3193 	} else {
3194 		amdgpu_ring_clear_ring(ring);
3195 	}
3196 
3197 	return 0;
3198 }
3199 
3200 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3201 {
3202 	struct amdgpu_ring *ring;
3203 	int r;
3204 
3205 	ring = &adev->gfx.kiq.ring;
3206 
3207 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
3208 	if (unlikely(r != 0))
3209 		return r;
3210 
3211 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3212 	if (unlikely(r != 0))
3213 		return r;
3214 
3215 	gfx_v9_0_kiq_init_queue(ring);
3216 	amdgpu_bo_kunmap(ring->mqd_obj);
3217 	ring->mqd_ptr = NULL;
3218 	amdgpu_bo_unreserve(ring->mqd_obj);
3219 	ring->sched.ready = true;
3220 	return 0;
3221 }
3222 
3223 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3224 {
3225 	struct amdgpu_ring *ring = NULL;
3226 	int r = 0, i;
3227 
3228 	gfx_v9_0_cp_compute_enable(adev, true);
3229 
3230 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3231 		ring = &adev->gfx.compute_ring[i];
3232 
3233 		r = amdgpu_bo_reserve(ring->mqd_obj, false);
3234 		if (unlikely(r != 0))
3235 			goto done;
3236 		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3237 		if (!r) {
3238 			r = gfx_v9_0_kcq_init_queue(ring);
3239 			amdgpu_bo_kunmap(ring->mqd_obj);
3240 			ring->mqd_ptr = NULL;
3241 		}
3242 		amdgpu_bo_unreserve(ring->mqd_obj);
3243 		if (r)
3244 			goto done;
3245 	}
3246 
3247 	r = gfx_v9_0_kiq_kcq_enable(adev);
3248 done:
3249 	return r;
3250 }
3251 
3252 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3253 {
3254 	int r, i;
3255 	struct amdgpu_ring *ring;
3256 
3257 	if (!(adev->flags & AMD_IS_APU))
3258 		gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3259 
3260 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3261 		/* legacy firmware loading */
3262 		r = gfx_v9_0_cp_gfx_load_microcode(adev);
3263 		if (r)
3264 			return r;
3265 
3266 		r = gfx_v9_0_cp_compute_load_microcode(adev);
3267 		if (r)
3268 			return r;
3269 	}
3270 
3271 	r = gfx_v9_0_kiq_resume(adev);
3272 	if (r)
3273 		return r;
3274 
3275 	r = gfx_v9_0_cp_gfx_resume(adev);
3276 	if (r)
3277 		return r;
3278 
3279 	r = gfx_v9_0_kcq_resume(adev);
3280 	if (r)
3281 		return r;
3282 
3283 	ring = &adev->gfx.gfx_ring[0];
3284 	r = amdgpu_ring_test_helper(ring);
3285 	if (r)
3286 		return r;
3287 
3288 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3289 		ring = &adev->gfx.compute_ring[i];
3290 		amdgpu_ring_test_helper(ring);
3291 	}
3292 
3293 	gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3294 
3295 	return 0;
3296 }
3297 
3298 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3299 {
3300 	gfx_v9_0_cp_gfx_enable(adev, enable);
3301 	gfx_v9_0_cp_compute_enable(adev, enable);
3302 }
3303 
3304 static int gfx_v9_0_hw_init(void *handle)
3305 {
3306 	int r;
3307 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3308 
3309 	gfx_v9_0_init_golden_registers(adev);
3310 
3311 	gfx_v9_0_constants_init(adev);
3312 
3313 	r = gfx_v9_0_csb_vram_pin(adev);
3314 	if (r)
3315 		return r;
3316 
3317 	r = adev->gfx.rlc.funcs->resume(adev);
3318 	if (r)
3319 		return r;
3320 
3321 	r = gfx_v9_0_cp_resume(adev);
3322 	if (r)
3323 		return r;
3324 
3325 	r = gfx_v9_0_ngg_en(adev);
3326 	if (r)
3327 		return r;
3328 
3329 	return r;
3330 }
3331 
3332 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3333 {
3334 	int r, i;
3335 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3336 
3337 	r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3338 	if (r)
3339 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3340 
3341 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3342 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3343 
3344 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3345 		amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3346 						PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3347 						PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3348 						PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3349 						PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3350 		amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3351 		amdgpu_ring_write(kiq_ring, 0);
3352 		amdgpu_ring_write(kiq_ring, 0);
3353 		amdgpu_ring_write(kiq_ring, 0);
3354 	}
3355 	r = amdgpu_ring_test_helper(kiq_ring);
3356 	if (r)
3357 		DRM_ERROR("KCQ disable failed\n");
3358 
3359 	return r;
3360 }
3361 
3362 static int gfx_v9_0_hw_fini(void *handle)
3363 {
3364 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3365 
3366 	amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3367 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3368 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3369 
3370 	/* disable KCQ to avoid CPC touch memory not valid anymore */
3371 	gfx_v9_0_kcq_disable(adev);
3372 
3373 	if (amdgpu_sriov_vf(adev)) {
3374 		gfx_v9_0_cp_gfx_enable(adev, false);
3375 		/* must disable polling for SRIOV when hw finished, otherwise
3376 		 * CPC engine may still keep fetching WB address which is already
3377 		 * invalid after sw finished and trigger DMAR reading error in
3378 		 * hypervisor side.
3379 		 */
3380 		WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3381 		return 0;
3382 	}
3383 
3384 	/* Use deinitialize sequence from CAIL when unbinding device from driver,
3385 	 * otherwise KIQ is hanging when binding back
3386 	 */
3387 	if (!adev->in_gpu_reset && !adev->in_suspend) {
3388 		mutex_lock(&adev->srbm_mutex);
3389 		soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3390 				adev->gfx.kiq.ring.pipe,
3391 				adev->gfx.kiq.ring.queue, 0);
3392 		gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3393 		soc15_grbm_select(adev, 0, 0, 0, 0);
3394 		mutex_unlock(&adev->srbm_mutex);
3395 	}
3396 
3397 	gfx_v9_0_cp_enable(adev, false);
3398 	adev->gfx.rlc.funcs->stop(adev);
3399 
3400 	gfx_v9_0_csb_vram_unpin(adev);
3401 
3402 	return 0;
3403 }
3404 
3405 static int gfx_v9_0_suspend(void *handle)
3406 {
3407 	return gfx_v9_0_hw_fini(handle);
3408 }
3409 
3410 static int gfx_v9_0_resume(void *handle)
3411 {
3412 	return gfx_v9_0_hw_init(handle);
3413 }
3414 
3415 static bool gfx_v9_0_is_idle(void *handle)
3416 {
3417 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3418 
3419 	if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3420 				GRBM_STATUS, GUI_ACTIVE))
3421 		return false;
3422 	else
3423 		return true;
3424 }
3425 
3426 static int gfx_v9_0_wait_for_idle(void *handle)
3427 {
3428 	unsigned i;
3429 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3430 
3431 	for (i = 0; i < adev->usec_timeout; i++) {
3432 		if (gfx_v9_0_is_idle(handle))
3433 			return 0;
3434 		udelay(1);
3435 	}
3436 	return -ETIMEDOUT;
3437 }
3438 
3439 static int gfx_v9_0_soft_reset(void *handle)
3440 {
3441 	u32 grbm_soft_reset = 0;
3442 	u32 tmp;
3443 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3444 
3445 	/* GRBM_STATUS */
3446 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
3447 	if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3448 		   GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3449 		   GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3450 		   GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3451 		   GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3452 		   GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3453 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3454 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3455 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3456 						GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3457 	}
3458 
3459 	if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3460 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3461 						GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3462 	}
3463 
3464 	/* GRBM_STATUS2 */
3465 	tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
3466 	if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3467 		grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3468 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3469 
3470 
3471 	if (grbm_soft_reset) {
3472 		/* stop the rlc */
3473 		adev->gfx.rlc.funcs->stop(adev);
3474 
3475 		/* Disable GFX parsing/prefetching */
3476 		gfx_v9_0_cp_gfx_enable(adev, false);
3477 
3478 		/* Disable MEC parsing/prefetching */
3479 		gfx_v9_0_cp_compute_enable(adev, false);
3480 
3481 		if (grbm_soft_reset) {
3482 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3483 			tmp |= grbm_soft_reset;
3484 			dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3485 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3486 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3487 
3488 			udelay(50);
3489 
3490 			tmp &= ~grbm_soft_reset;
3491 			WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
3492 			tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
3493 		}
3494 
3495 		/* Wait a little for things to settle down */
3496 		udelay(50);
3497 	}
3498 	return 0;
3499 }
3500 
3501 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3502 {
3503 	uint64_t clock;
3504 
3505 	mutex_lock(&adev->gfx.gpu_clock_mutex);
3506 	WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3507 	clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
3508 		((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3509 	mutex_unlock(&adev->gfx.gpu_clock_mutex);
3510 	return clock;
3511 }
3512 
3513 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3514 					  uint32_t vmid,
3515 					  uint32_t gds_base, uint32_t gds_size,
3516 					  uint32_t gws_base, uint32_t gws_size,
3517 					  uint32_t oa_base, uint32_t oa_size)
3518 {
3519 	struct amdgpu_device *adev = ring->adev;
3520 
3521 	/* GDS Base */
3522 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3523 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
3524 				   gds_base);
3525 
3526 	/* GDS Size */
3527 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3528 				   SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
3529 				   gds_size);
3530 
3531 	/* GWS */
3532 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3533 				   SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
3534 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3535 
3536 	/* OA */
3537 	gfx_v9_0_write_data_to_reg(ring, 0, false,
3538 				   SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
3539 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
3540 }
3541 
3542 static const u32 vgpr_init_compute_shader[] =
3543 {
3544 	0xb07c0000, 0xbe8000ff,
3545 	0x000000f8, 0xbf110800,
3546 	0x7e000280, 0x7e020280,
3547 	0x7e040280, 0x7e060280,
3548 	0x7e080280, 0x7e0a0280,
3549 	0x7e0c0280, 0x7e0e0280,
3550 	0x80808800, 0xbe803200,
3551 	0xbf84fff5, 0xbf9c0000,
3552 	0xd28c0001, 0x0001007f,
3553 	0xd28d0001, 0x0002027e,
3554 	0x10020288, 0xb8810904,
3555 	0xb7814000, 0xd1196a01,
3556 	0x00000301, 0xbe800087,
3557 	0xbefc00c1, 0xd89c4000,
3558 	0x00020201, 0xd89cc080,
3559 	0x00040401, 0x320202ff,
3560 	0x00000800, 0x80808100,
3561 	0xbf84fff8, 0x7e020280,
3562 	0xbf810000, 0x00000000,
3563 };
3564 
3565 static const u32 sgpr_init_compute_shader[] =
3566 {
3567 	0xb07c0000, 0xbe8000ff,
3568 	0x0000005f, 0xbee50080,
3569 	0xbe812c65, 0xbe822c65,
3570 	0xbe832c65, 0xbe842c65,
3571 	0xbe852c65, 0xb77c0005,
3572 	0x80808500, 0xbf84fff8,
3573 	0xbe800080, 0xbf810000,
3574 };
3575 
3576 static const struct soc15_reg_entry vgpr_init_regs[] = {
3577    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3578    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3579    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3580    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3581    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3582    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3583    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3584    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3585    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
3586    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
3587 };
3588 
3589 static const struct soc15_reg_entry sgpr_init_regs[] = {
3590    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
3591    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
3592    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
3593    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
3594    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
3595    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
3596    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
3597    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
3598    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
3599    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
3600 };
3601 
3602 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
3603    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) },
3604    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) },
3605    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) },
3606    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) },
3607    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) },
3608    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) },
3609    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) },
3610    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) },
3611    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) },
3612    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) },
3613    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) },
3614    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) },
3615    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) },
3616    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) },
3617    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) },
3618    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) },
3619    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) },
3620    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) },
3621    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) },
3622    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) },
3623    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) },
3624    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) },
3625    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) },
3626    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) },
3627    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) },
3628    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) },
3629    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) },
3630    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) },
3631    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) },
3632    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) },
3633    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) },
3634 };
3635 
3636 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
3637 {
3638 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
3639 	struct amdgpu_ib ib;
3640 	struct dma_fence *f = NULL;
3641 	int r, i, j;
3642 	unsigned total_size, vgpr_offset, sgpr_offset;
3643 	u64 gpu_addr;
3644 
3645 	/* only support when RAS is enabled */
3646 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
3647 		return 0;
3648 
3649 	/* bail if the compute ring is not ready */
3650 	if (!ring->sched.ready)
3651 		return 0;
3652 
3653 	total_size =
3654 		((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3655 	total_size +=
3656 		((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
3657 	total_size = ALIGN(total_size, 256);
3658 	vgpr_offset = total_size;
3659 	total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
3660 	sgpr_offset = total_size;
3661 	total_size += sizeof(sgpr_init_compute_shader);
3662 
3663 	/* allocate an indirect buffer to put the commands in */
3664 	memset(&ib, 0, sizeof(ib));
3665 	r = amdgpu_ib_get(adev, NULL, total_size, &ib);
3666 	if (r) {
3667 		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
3668 		return r;
3669 	}
3670 
3671 	/* load the compute shaders */
3672 	for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
3673 		ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
3674 
3675 	for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
3676 		ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
3677 
3678 	/* init the ib length to 0 */
3679 	ib.length_dw = 0;
3680 
3681 	/* VGPR */
3682 	/* write the register state for the compute dispatch */
3683 	for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
3684 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3685 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
3686 								- PACKET3_SET_SH_REG_START;
3687 		ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
3688 	}
3689 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3690 	gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
3691 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3692 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3693 							- PACKET3_SET_SH_REG_START;
3694 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3695 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3696 
3697 	/* write dispatch packet */
3698 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3699 	ib.ptr[ib.length_dw++] = 128; /* x */
3700 	ib.ptr[ib.length_dw++] = 1; /* y */
3701 	ib.ptr[ib.length_dw++] = 1; /* z */
3702 	ib.ptr[ib.length_dw++] =
3703 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3704 
3705 	/* write CS partial flush packet */
3706 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3707 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3708 
3709 	/* SGPR */
3710 	/* write the register state for the compute dispatch */
3711 	for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
3712 		ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
3713 		ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
3714 								- PACKET3_SET_SH_REG_START;
3715 		ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
3716 	}
3717 	/* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
3718 	gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
3719 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
3720 	ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
3721 							- PACKET3_SET_SH_REG_START;
3722 	ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
3723 	ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
3724 
3725 	/* write dispatch packet */
3726 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
3727 	ib.ptr[ib.length_dw++] = 128; /* x */
3728 	ib.ptr[ib.length_dw++] = 1; /* y */
3729 	ib.ptr[ib.length_dw++] = 1; /* z */
3730 	ib.ptr[ib.length_dw++] =
3731 		REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
3732 
3733 	/* write CS partial flush packet */
3734 	ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
3735 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
3736 
3737 	/* shedule the ib on the ring */
3738 	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
3739 	if (r) {
3740 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
3741 		goto fail;
3742 	}
3743 
3744 	/* wait for the GPU to finish processing the IB */
3745 	r = dma_fence_wait(f, false);
3746 	if (r) {
3747 		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
3748 		goto fail;
3749 	}
3750 
3751 	/* read back registers to clear the counters */
3752 	mutex_lock(&adev->grbm_idx_mutex);
3753 	for (j = 0; j < 16; j++) {
3754 		gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j);
3755 		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3756 			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3757 		gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j);
3758 		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3759 			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3760 		gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j);
3761 		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3762 			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3763 		gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j);
3764 		for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
3765 			RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
3766 	}
3767 	WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
3768 	mutex_unlock(&adev->grbm_idx_mutex);
3769 
3770 fail:
3771 	amdgpu_ib_free(adev, &ib, NULL);
3772 	dma_fence_put(f);
3773 
3774 	return r;
3775 }
3776 
3777 static int gfx_v9_0_early_init(void *handle)
3778 {
3779 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3780 
3781 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
3782 	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
3783 	gfx_v9_0_set_ring_funcs(adev);
3784 	gfx_v9_0_set_irq_funcs(adev);
3785 	gfx_v9_0_set_gds_init(adev);
3786 	gfx_v9_0_set_rlc_funcs(adev);
3787 
3788 	return 0;
3789 }
3790 
3791 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
3792 		struct amdgpu_iv_entry *entry);
3793 
3794 static int gfx_v9_0_ecc_late_init(void *handle)
3795 {
3796 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3797 	struct ras_common_if **ras_if = &adev->gfx.ras_if;
3798 	struct ras_ih_if ih_info = {
3799 		.cb = gfx_v9_0_process_ras_data_cb,
3800 	};
3801 	struct ras_fs_if fs_info = {
3802 		.sysfs_name = "gfx_err_count",
3803 		.debugfs_name = "gfx_err_inject",
3804 	};
3805 	struct ras_common_if ras_block = {
3806 		.block = AMDGPU_RAS_BLOCK__GFX,
3807 		.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
3808 		.sub_block_index = 0,
3809 		.name = "gfx",
3810 	};
3811 	int r;
3812 
3813 	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
3814 		amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
3815 		return 0;
3816 	}
3817 
3818 	/* requires IBs so do in late init after IB pool is initialized */
3819 	r = gfx_v9_0_do_edc_gpr_workarounds(adev);
3820 	if (r)
3821 		return r;
3822 
3823 	/* handle resume path. */
3824 	if (*ras_if) {
3825 		/* resend ras TA enable cmd during resume.
3826 		 * prepare to handle failure.
3827 		 */
3828 		ih_info.head = **ras_if;
3829 		r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3830 		if (r) {
3831 			if (r == -EAGAIN) {
3832 				/* request a gpu reset. will run again. */
3833 				amdgpu_ras_request_reset_on_boot(adev,
3834 						AMDGPU_RAS_BLOCK__GFX);
3835 				return 0;
3836 			}
3837 			/* fail to enable ras, cleanup all. */
3838 			goto irq;
3839 		}
3840 		/* enable successfully. continue. */
3841 		goto resume;
3842 	}
3843 
3844 	*ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
3845 	if (!*ras_if)
3846 		return -ENOMEM;
3847 
3848 	**ras_if = ras_block;
3849 
3850 	r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
3851 	if (r) {
3852 		if (r == -EAGAIN) {
3853 			amdgpu_ras_request_reset_on_boot(adev,
3854 					AMDGPU_RAS_BLOCK__GFX);
3855 			r = 0;
3856 		}
3857 		goto feature;
3858 	}
3859 
3860 	ih_info.head = **ras_if;
3861 	fs_info.head = **ras_if;
3862 
3863 	r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
3864 	if (r)
3865 		goto interrupt;
3866 
3867 	r = amdgpu_ras_debugfs_create(adev, &fs_info);
3868 	if (r)
3869 		goto debugfs;
3870 
3871 	r = amdgpu_ras_sysfs_create(adev, &fs_info);
3872 	if (r)
3873 		goto sysfs;
3874 resume:
3875 	r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
3876 	if (r)
3877 		goto irq;
3878 
3879 	return 0;
3880 irq:
3881 	amdgpu_ras_sysfs_remove(adev, *ras_if);
3882 sysfs:
3883 	amdgpu_ras_debugfs_remove(adev, *ras_if);
3884 debugfs:
3885 	amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
3886 interrupt:
3887 	amdgpu_ras_feature_enable(adev, *ras_if, 0);
3888 feature:
3889 	kfree(*ras_if);
3890 	*ras_if = NULL;
3891 	return r;
3892 }
3893 
3894 static int gfx_v9_0_late_init(void *handle)
3895 {
3896 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897 	int r;
3898 
3899 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
3900 	if (r)
3901 		return r;
3902 
3903 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
3904 	if (r)
3905 		return r;
3906 
3907 	r = gfx_v9_0_ecc_late_init(handle);
3908 	if (r)
3909 		return r;
3910 
3911 	return 0;
3912 }
3913 
3914 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
3915 {
3916 	uint32_t rlc_setting;
3917 
3918 	/* if RLC is not enabled, do nothing */
3919 	rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
3920 	if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
3921 		return false;
3922 
3923 	return true;
3924 }
3925 
3926 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
3927 {
3928 	uint32_t data;
3929 	unsigned i;
3930 
3931 	data = RLC_SAFE_MODE__CMD_MASK;
3932 	data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
3933 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3934 
3935 	/* wait for RLC_SAFE_MODE */
3936 	for (i = 0; i < adev->usec_timeout; i++) {
3937 		if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
3938 			break;
3939 		udelay(1);
3940 	}
3941 }
3942 
3943 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
3944 {
3945 	uint32_t data;
3946 
3947 	data = RLC_SAFE_MODE__CMD_MASK;
3948 	WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
3949 }
3950 
3951 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
3952 						bool enable)
3953 {
3954 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3955 
3956 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
3957 		gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
3958 		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
3959 			gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
3960 	} else {
3961 		gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
3962 		gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
3963 	}
3964 
3965 	amdgpu_gfx_rlc_exit_safe_mode(adev);
3966 }
3967 
3968 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
3969 						bool enable)
3970 {
3971 	/* TODO: double check if we need to perform under safe mode */
3972 	/* gfx_v9_0_enter_rlc_safe_mode(adev); */
3973 
3974 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
3975 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
3976 	else
3977 		gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
3978 
3979 	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
3980 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
3981 	else
3982 		gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
3983 
3984 	/* gfx_v9_0_exit_rlc_safe_mode(adev); */
3985 }
3986 
3987 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
3988 						      bool enable)
3989 {
3990 	uint32_t data, def;
3991 
3992 	amdgpu_gfx_rlc_enter_safe_mode(adev);
3993 
3994 	/* It is disabled by HW by default */
3995 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
3996 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
3997 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
3998 
3999 		if (adev->asic_type != CHIP_VEGA12)
4000 			data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4001 
4002 		data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4003 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4004 			  RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4005 
4006 		/* only for Vega10 & Raven1 */
4007 		data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4008 
4009 		if (def != data)
4010 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4011 
4012 		/* MGLS is a global flag to control all MGLS in GFX */
4013 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4014 			/* 2 - RLC memory Light sleep */
4015 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4016 				def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4017 				data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4018 				if (def != data)
4019 					WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4020 			}
4021 			/* 3 - CP memory Light sleep */
4022 			if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4023 				def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4024 				data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4025 				if (def != data)
4026 					WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4027 			}
4028 		}
4029 	} else {
4030 		/* 1 - MGCG_OVERRIDE */
4031 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4032 
4033 		if (adev->asic_type != CHIP_VEGA12)
4034 			data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4035 
4036 		data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4037 			 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4038 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4039 			 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4040 
4041 		if (def != data)
4042 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4043 
4044 		/* 2 - disable MGLS in RLC */
4045 		data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4046 		if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4047 			data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4048 			WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4049 		}
4050 
4051 		/* 3 - disable MGLS in CP */
4052 		data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4053 		if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4054 			data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4055 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4056 		}
4057 	}
4058 
4059 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4060 }
4061 
4062 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4063 					   bool enable)
4064 {
4065 	uint32_t data, def;
4066 
4067 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4068 
4069 	/* Enable 3D CGCG/CGLS */
4070 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4071 		/* write cmd to clear cgcg/cgls ov */
4072 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4073 		/* unset CGCG override */
4074 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4075 		/* update CGCG and CGLS override bits */
4076 		if (def != data)
4077 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4078 
4079 		/* enable 3Dcgcg FSM(0x0000363f) */
4080 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4081 
4082 		data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4083 			RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4084 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4085 			data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4086 				RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4087 		if (def != data)
4088 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4089 
4090 		/* set IDLE_POLL_COUNT(0x00900100) */
4091 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4092 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4093 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4094 		if (def != data)
4095 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4096 	} else {
4097 		/* Disable CGCG/CGLS */
4098 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4099 		/* disable cgcg, cgls should be disabled */
4100 		data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4101 			  RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4102 		/* disable cgcg and cgls in FSM */
4103 		if (def != data)
4104 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4105 	}
4106 
4107 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4108 }
4109 
4110 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4111 						      bool enable)
4112 {
4113 	uint32_t def, data;
4114 
4115 	amdgpu_gfx_rlc_enter_safe_mode(adev);
4116 
4117 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4118 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4119 		/* unset CGCG override */
4120 		data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4121 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4122 			data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4123 		else
4124 			data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4125 		/* update CGCG and CGLS override bits */
4126 		if (def != data)
4127 			WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4128 
4129 		/* enable cgcg FSM(0x0000363F) */
4130 		def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4131 
4132 		data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4133 			RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4134 		if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4135 			data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4136 				RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4137 		if (def != data)
4138 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4139 
4140 		/* set IDLE_POLL_COUNT(0x00900100) */
4141 		def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4142 		data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4143 			(0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4144 		if (def != data)
4145 			WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4146 	} else {
4147 		def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4148 		/* reset CGCG/CGLS bits */
4149 		data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4150 		/* disable cgcg and cgls in FSM */
4151 		if (def != data)
4152 			WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4153 	}
4154 
4155 	amdgpu_gfx_rlc_exit_safe_mode(adev);
4156 }
4157 
4158 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4159 					    bool enable)
4160 {
4161 	if (enable) {
4162 		/* CGCG/CGLS should be enabled after MGCG/MGLS
4163 		 * ===  MGCG + MGLS ===
4164 		 */
4165 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4166 		/* ===  CGCG /CGLS for GFX 3D Only === */
4167 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4168 		/* ===  CGCG + CGLS === */
4169 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4170 	} else {
4171 		/* CGCG/CGLS should be disabled before MGCG/MGLS
4172 		 * ===  CGCG + CGLS ===
4173 		 */
4174 		gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4175 		/* ===  CGCG /CGLS for GFX 3D Only === */
4176 		gfx_v9_0_update_3d_clock_gating(adev, enable);
4177 		/* ===  MGCG + MGLS === */
4178 		gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4179 	}
4180 	return 0;
4181 }
4182 
4183 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4184 	.is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4185 	.set_safe_mode = gfx_v9_0_set_safe_mode,
4186 	.unset_safe_mode = gfx_v9_0_unset_safe_mode,
4187 	.init = gfx_v9_0_rlc_init,
4188 	.get_csb_size = gfx_v9_0_get_csb_size,
4189 	.get_csb_buffer = gfx_v9_0_get_csb_buffer,
4190 	.get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4191 	.resume = gfx_v9_0_rlc_resume,
4192 	.stop = gfx_v9_0_rlc_stop,
4193 	.reset = gfx_v9_0_rlc_reset,
4194 	.start = gfx_v9_0_rlc_start
4195 };
4196 
4197 static int gfx_v9_0_set_powergating_state(void *handle,
4198 					  enum amd_powergating_state state)
4199 {
4200 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4201 	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4202 
4203 	switch (adev->asic_type) {
4204 	case CHIP_RAVEN:
4205 		if (!enable) {
4206 			amdgpu_gfx_off_ctrl(adev, false);
4207 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4208 		}
4209 		if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4210 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4211 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4212 		} else {
4213 			gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4214 			gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4215 		}
4216 
4217 		if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4218 			gfx_v9_0_enable_cp_power_gating(adev, true);
4219 		else
4220 			gfx_v9_0_enable_cp_power_gating(adev, false);
4221 
4222 		/* update gfx cgpg state */
4223 		gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4224 
4225 		/* update mgcg state */
4226 		gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4227 
4228 		if (enable)
4229 			amdgpu_gfx_off_ctrl(adev, true);
4230 		break;
4231 	case CHIP_VEGA12:
4232 		if (!enable) {
4233 			amdgpu_gfx_off_ctrl(adev, false);
4234 			cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4235 		} else {
4236 			amdgpu_gfx_off_ctrl(adev, true);
4237 		}
4238 		break;
4239 	default:
4240 		break;
4241 	}
4242 
4243 	return 0;
4244 }
4245 
4246 static int gfx_v9_0_set_clockgating_state(void *handle,
4247 					  enum amd_clockgating_state state)
4248 {
4249 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4250 
4251 	if (amdgpu_sriov_vf(adev))
4252 		return 0;
4253 
4254 	switch (adev->asic_type) {
4255 	case CHIP_VEGA10:
4256 	case CHIP_VEGA12:
4257 	case CHIP_VEGA20:
4258 	case CHIP_RAVEN:
4259 		gfx_v9_0_update_gfx_clock_gating(adev,
4260 						 state == AMD_CG_STATE_GATE ? true : false);
4261 		break;
4262 	default:
4263 		break;
4264 	}
4265 	return 0;
4266 }
4267 
4268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4269 {
4270 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4271 	int data;
4272 
4273 	if (amdgpu_sriov_vf(adev))
4274 		*flags = 0;
4275 
4276 	/* AMD_CG_SUPPORT_GFX_MGCG */
4277 	data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4278 	if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4279 		*flags |= AMD_CG_SUPPORT_GFX_MGCG;
4280 
4281 	/* AMD_CG_SUPPORT_GFX_CGCG */
4282 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4283 	if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4284 		*flags |= AMD_CG_SUPPORT_GFX_CGCG;
4285 
4286 	/* AMD_CG_SUPPORT_GFX_CGLS */
4287 	if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4288 		*flags |= AMD_CG_SUPPORT_GFX_CGLS;
4289 
4290 	/* AMD_CG_SUPPORT_GFX_RLC_LS */
4291 	data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4292 	if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4293 		*flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4294 
4295 	/* AMD_CG_SUPPORT_GFX_CP_LS */
4296 	data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4297 	if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4298 		*flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4299 
4300 	/* AMD_CG_SUPPORT_GFX_3D_CGCG */
4301 	data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4302 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4303 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4304 
4305 	/* AMD_CG_SUPPORT_GFX_3D_CGLS */
4306 	if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4307 		*flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4308 }
4309 
4310 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4311 {
4312 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4313 }
4314 
4315 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4316 {
4317 	struct amdgpu_device *adev = ring->adev;
4318 	u64 wptr;
4319 
4320 	/* XXX check if swapping is necessary on BE */
4321 	if (ring->use_doorbell) {
4322 		wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4323 	} else {
4324 		wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4325 		wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4326 	}
4327 
4328 	return wptr;
4329 }
4330 
4331 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4332 {
4333 	struct amdgpu_device *adev = ring->adev;
4334 
4335 	if (ring->use_doorbell) {
4336 		/* XXX check if swapping is necessary on BE */
4337 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4338 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4339 	} else {
4340 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4341 		WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4342 	}
4343 }
4344 
4345 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4346 {
4347 	struct amdgpu_device *adev = ring->adev;
4348 	u32 ref_and_mask, reg_mem_engine;
4349 	const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4350 
4351 	if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4352 		switch (ring->me) {
4353 		case 1:
4354 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4355 			break;
4356 		case 2:
4357 			ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4358 			break;
4359 		default:
4360 			return;
4361 		}
4362 		reg_mem_engine = 0;
4363 	} else {
4364 		ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
4365 		reg_mem_engine = 1; /* pfp */
4366 	}
4367 
4368 	gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
4369 			      adev->nbio_funcs->get_hdp_flush_req_offset(adev),
4370 			      adev->nbio_funcs->get_hdp_flush_done_offset(adev),
4371 			      ref_and_mask, ref_and_mask, 0x20);
4372 }
4373 
4374 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4375 					struct amdgpu_job *job,
4376 					struct amdgpu_ib *ib,
4377 					uint32_t flags)
4378 {
4379 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4380 	u32 header, control = 0;
4381 
4382 	if (ib->flags & AMDGPU_IB_FLAG_CE)
4383 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4384 	else
4385 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4386 
4387 	control |= ib->length_dw | (vmid << 24);
4388 
4389 	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
4390 		control |= INDIRECT_BUFFER_PRE_ENB(1);
4391 
4392 		if (!(ib->flags & AMDGPU_IB_FLAG_CE))
4393 			gfx_v9_0_ring_emit_de_meta(ring);
4394 	}
4395 
4396 	amdgpu_ring_write(ring, header);
4397 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4398 	amdgpu_ring_write(ring,
4399 #ifdef __BIG_ENDIAN
4400 		(2 << 0) |
4401 #endif
4402 		lower_32_bits(ib->gpu_addr));
4403 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4404 	amdgpu_ring_write(ring, control);
4405 }
4406 
4407 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4408 					  struct amdgpu_job *job,
4409 					  struct amdgpu_ib *ib,
4410 					  uint32_t flags)
4411 {
4412 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
4413 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
4414 
4415 	/* Currently, there is a high possibility to get wave ID mismatch
4416 	 * between ME and GDS, leading to a hw deadlock, because ME generates
4417 	 * different wave IDs than the GDS expects. This situation happens
4418 	 * randomly when at least 5 compute pipes use GDS ordered append.
4419 	 * The wave IDs generated by ME are also wrong after suspend/resume.
4420 	 * Those are probably bugs somewhere else in the kernel driver.
4421 	 *
4422 	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
4423 	 * GDS to 0 for this ring (me/pipe).
4424 	 */
4425 	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
4426 		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
4427 		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
4428 		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
4429 	}
4430 
4431 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
4432 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
4433 	amdgpu_ring_write(ring,
4434 #ifdef __BIG_ENDIAN
4435 				(2 << 0) |
4436 #endif
4437 				lower_32_bits(ib->gpu_addr));
4438 	amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
4439 	amdgpu_ring_write(ring, control);
4440 }
4441 
4442 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
4443 				     u64 seq, unsigned flags)
4444 {
4445 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4446 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4447 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
4448 
4449 	/* RELEASE_MEM - flush caches, send int */
4450 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
4451 	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
4452 					       EOP_TC_NC_ACTION_EN) :
4453 					      (EOP_TCL1_ACTION_EN |
4454 					       EOP_TC_ACTION_EN |
4455 					       EOP_TC_WB_ACTION_EN |
4456 					       EOP_TC_MD_ACTION_EN)) |
4457 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4458 				 EVENT_INDEX(5)));
4459 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4460 
4461 	/*
4462 	 * the address should be Qword aligned if 64bit write, Dword
4463 	 * aligned if only send 32bit data low (discard data high)
4464 	 */
4465 	if (write64bit)
4466 		BUG_ON(addr & 0x7);
4467 	else
4468 		BUG_ON(addr & 0x3);
4469 	amdgpu_ring_write(ring, lower_32_bits(addr));
4470 	amdgpu_ring_write(ring, upper_32_bits(addr));
4471 	amdgpu_ring_write(ring, lower_32_bits(seq));
4472 	amdgpu_ring_write(ring, upper_32_bits(seq));
4473 	amdgpu_ring_write(ring, 0);
4474 }
4475 
4476 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4477 {
4478 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4479 	uint32_t seq = ring->fence_drv.sync_seq;
4480 	uint64_t addr = ring->fence_drv.gpu_addr;
4481 
4482 	gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
4483 			      lower_32_bits(addr), upper_32_bits(addr),
4484 			      seq, 0xffffffff, 4);
4485 }
4486 
4487 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4488 					unsigned vmid, uint64_t pd_addr)
4489 {
4490 	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
4491 
4492 	/* compute doesn't have PFP */
4493 	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
4494 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4495 		amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4496 		amdgpu_ring_write(ring, 0x0);
4497 	}
4498 }
4499 
4500 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4501 {
4502 	return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
4503 }
4504 
4505 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4506 {
4507 	u64 wptr;
4508 
4509 	/* XXX check if swapping is necessary on BE */
4510 	if (ring->use_doorbell)
4511 		wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
4512 	else
4513 		BUG();
4514 	return wptr;
4515 }
4516 
4517 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
4518 					   bool acquire)
4519 {
4520 	struct amdgpu_device *adev = ring->adev;
4521 	int pipe_num, tmp, reg;
4522 	int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
4523 
4524 	pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
4525 
4526 	/* first me only has 2 entries, GFX and HP3D */
4527 	if (ring->me > 0)
4528 		pipe_num -= 2;
4529 
4530 	reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
4531 	tmp = RREG32(reg);
4532 	tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
4533 	WREG32(reg, tmp);
4534 }
4535 
4536 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
4537 					    struct amdgpu_ring *ring,
4538 					    bool acquire)
4539 {
4540 	int i, pipe;
4541 	bool reserve;
4542 	struct amdgpu_ring *iring;
4543 
4544 	mutex_lock(&adev->gfx.pipe_reserve_mutex);
4545 	pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
4546 	if (acquire)
4547 		set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4548 	else
4549 		clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4550 
4551 	if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
4552 		/* Clear all reservations - everyone reacquires all resources */
4553 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
4554 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
4555 						       true);
4556 
4557 		for (i = 0; i < adev->gfx.num_compute_rings; ++i)
4558 			gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
4559 						       true);
4560 	} else {
4561 		/* Lower all pipes without a current reservation */
4562 		for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
4563 			iring = &adev->gfx.gfx_ring[i];
4564 			pipe = amdgpu_gfx_queue_to_bit(adev,
4565 						       iring->me,
4566 						       iring->pipe,
4567 						       0);
4568 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4569 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4570 		}
4571 
4572 		for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
4573 			iring = &adev->gfx.compute_ring[i];
4574 			pipe = amdgpu_gfx_queue_to_bit(adev,
4575 						       iring->me,
4576 						       iring->pipe,
4577 						       0);
4578 			reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
4579 			gfx_v9_0_ring_set_pipe_percent(iring, reserve);
4580 		}
4581 	}
4582 
4583 	mutex_unlock(&adev->gfx.pipe_reserve_mutex);
4584 }
4585 
4586 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
4587 				      struct amdgpu_ring *ring,
4588 				      bool acquire)
4589 {
4590 	uint32_t pipe_priority = acquire ? 0x2 : 0x0;
4591 	uint32_t queue_priority = acquire ? 0xf : 0x0;
4592 
4593 	mutex_lock(&adev->srbm_mutex);
4594 	soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4595 
4596 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
4597 	WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
4598 
4599 	soc15_grbm_select(adev, 0, 0, 0, 0);
4600 	mutex_unlock(&adev->srbm_mutex);
4601 }
4602 
4603 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
4604 					       enum drm_sched_priority priority)
4605 {
4606 	struct amdgpu_device *adev = ring->adev;
4607 	bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
4608 
4609 	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
4610 		return;
4611 
4612 	gfx_v9_0_hqd_set_priority(adev, ring, acquire);
4613 	gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
4614 }
4615 
4616 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4617 {
4618 	struct amdgpu_device *adev = ring->adev;
4619 
4620 	/* XXX check if swapping is necessary on BE */
4621 	if (ring->use_doorbell) {
4622 		atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4623 		WDOORBELL64(ring->doorbell_index, ring->wptr);
4624 	} else{
4625 		BUG(); /* only DOORBELL method supported on gfx9 now */
4626 	}
4627 }
4628 
4629 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
4630 					 u64 seq, unsigned int flags)
4631 {
4632 	struct amdgpu_device *adev = ring->adev;
4633 
4634 	/* we only allocate 32bit for each seq wb address */
4635 	BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
4636 
4637 	/* write fence seq to the "addr" */
4638 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4639 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4640 				 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
4641 	amdgpu_ring_write(ring, lower_32_bits(addr));
4642 	amdgpu_ring_write(ring, upper_32_bits(addr));
4643 	amdgpu_ring_write(ring, lower_32_bits(seq));
4644 
4645 	if (flags & AMDGPU_FENCE_FLAG_INT) {
4646 		/* set register to trigger INT */
4647 		amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4648 		amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4649 					 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
4650 		amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
4651 		amdgpu_ring_write(ring, 0);
4652 		amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
4653 	}
4654 }
4655 
4656 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
4657 {
4658 	amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4659 	amdgpu_ring_write(ring, 0);
4660 }
4661 
4662 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
4663 {
4664 	struct v9_ce_ib_state ce_payload = {0};
4665 	uint64_t csa_addr;
4666 	int cnt;
4667 
4668 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
4669 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4670 
4671 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4672 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
4673 				 WRITE_DATA_DST_SEL(8) |
4674 				 WR_CONFIRM) |
4675 				 WRITE_DATA_CACHE_POLICY(0));
4676 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4677 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
4678 	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
4679 }
4680 
4681 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
4682 {
4683 	struct v9_de_ib_state de_payload = {0};
4684 	uint64_t csa_addr, gds_addr;
4685 	int cnt;
4686 
4687 	csa_addr = amdgpu_csa_vaddr(ring->adev);
4688 	gds_addr = csa_addr + 4096;
4689 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
4690 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
4691 
4692 	cnt = (sizeof(de_payload) >> 2) + 4 - 2;
4693 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
4694 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
4695 				 WRITE_DATA_DST_SEL(8) |
4696 				 WR_CONFIRM) |
4697 				 WRITE_DATA_CACHE_POLICY(0));
4698 	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4699 	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
4700 	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
4701 }
4702 
4703 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
4704 {
4705 	amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
4706 	amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
4707 }
4708 
4709 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
4710 {
4711 	uint32_t dw2 = 0;
4712 
4713 	if (amdgpu_sriov_vf(ring->adev))
4714 		gfx_v9_0_ring_emit_ce_meta(ring);
4715 
4716 	gfx_v9_0_ring_emit_tmz(ring, true);
4717 
4718 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
4719 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
4720 		/* set load_global_config & load_global_uconfig */
4721 		dw2 |= 0x8001;
4722 		/* set load_cs_sh_regs */
4723 		dw2 |= 0x01000000;
4724 		/* set load_per_context_state & load_gfx_sh_regs for GFX */
4725 		dw2 |= 0x10002;
4726 
4727 		/* set load_ce_ram if preamble presented */
4728 		if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
4729 			dw2 |= 0x10000000;
4730 	} else {
4731 		/* still load_ce_ram if this is the first time preamble presented
4732 		 * although there is no context switch happens.
4733 		 */
4734 		if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
4735 			dw2 |= 0x10000000;
4736 	}
4737 
4738 	amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4739 	amdgpu_ring_write(ring, dw2);
4740 	amdgpu_ring_write(ring, 0);
4741 }
4742 
4743 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
4744 {
4745 	unsigned ret;
4746 	amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
4747 	amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
4748 	amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
4749 	amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
4750 	ret = ring->wptr & ring->buf_mask;
4751 	amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
4752 	return ret;
4753 }
4754 
4755 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
4756 {
4757 	unsigned cur;
4758 	BUG_ON(offset > ring->buf_mask);
4759 	BUG_ON(ring->ring[offset] != 0x55aa55aa);
4760 
4761 	cur = (ring->wptr & ring->buf_mask) - 1;
4762 	if (likely(cur > offset))
4763 		ring->ring[offset] = cur - offset;
4764 	else
4765 		ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
4766 }
4767 
4768 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
4769 {
4770 	struct amdgpu_device *adev = ring->adev;
4771 
4772 	amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4773 	amdgpu_ring_write(ring, 0 |	/* src: register*/
4774 				(5 << 8) |	/* dst: memory */
4775 				(1 << 20));	/* write confirm */
4776 	amdgpu_ring_write(ring, reg);
4777 	amdgpu_ring_write(ring, 0);
4778 	amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4779 				adev->virt.reg_val_offs * 4));
4780 	amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4781 				adev->virt.reg_val_offs * 4));
4782 }
4783 
4784 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
4785 				    uint32_t val)
4786 {
4787 	uint32_t cmd = 0;
4788 
4789 	switch (ring->funcs->type) {
4790 	case AMDGPU_RING_TYPE_GFX:
4791 		cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
4792 		break;
4793 	case AMDGPU_RING_TYPE_KIQ:
4794 		cmd = (1 << 16); /* no inc addr */
4795 		break;
4796 	default:
4797 		cmd = WR_CONFIRM;
4798 		break;
4799 	}
4800 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4801 	amdgpu_ring_write(ring, cmd);
4802 	amdgpu_ring_write(ring, reg);
4803 	amdgpu_ring_write(ring, 0);
4804 	amdgpu_ring_write(ring, val);
4805 }
4806 
4807 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
4808 					uint32_t val, uint32_t mask)
4809 {
4810 	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
4811 }
4812 
4813 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
4814 						  uint32_t reg0, uint32_t reg1,
4815 						  uint32_t ref, uint32_t mask)
4816 {
4817 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
4818 	struct amdgpu_device *adev = ring->adev;
4819 	bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
4820 		adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
4821 
4822 	if (fw_version_ok)
4823 		gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
4824 				      ref, mask, 0x20);
4825 	else
4826 		amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
4827 							   ref, mask);
4828 }
4829 
4830 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
4831 {
4832 	struct amdgpu_device *adev = ring->adev;
4833 	uint32_t value = 0;
4834 
4835 	value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
4836 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
4837 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
4838 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
4839 	WREG32(mmSQ_CMD, value);
4840 }
4841 
4842 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4843 						 enum amdgpu_interrupt_state state)
4844 {
4845 	switch (state) {
4846 	case AMDGPU_IRQ_STATE_DISABLE:
4847 	case AMDGPU_IRQ_STATE_ENABLE:
4848 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4849 			       TIME_STAMP_INT_ENABLE,
4850 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4851 		break;
4852 	default:
4853 		break;
4854 	}
4855 }
4856 
4857 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4858 						     int me, int pipe,
4859 						     enum amdgpu_interrupt_state state)
4860 {
4861 	u32 mec_int_cntl, mec_int_cntl_reg;
4862 
4863 	/*
4864 	 * amdgpu controls only the first MEC. That's why this function only
4865 	 * handles the setting of interrupts for this specific MEC. All other
4866 	 * pipes' interrupts are set by amdkfd.
4867 	 */
4868 
4869 	if (me == 1) {
4870 		switch (pipe) {
4871 		case 0:
4872 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
4873 			break;
4874 		case 1:
4875 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
4876 			break;
4877 		case 2:
4878 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
4879 			break;
4880 		case 3:
4881 			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
4882 			break;
4883 		default:
4884 			DRM_DEBUG("invalid pipe %d\n", pipe);
4885 			return;
4886 		}
4887 	} else {
4888 		DRM_DEBUG("invalid me %d\n", me);
4889 		return;
4890 	}
4891 
4892 	switch (state) {
4893 	case AMDGPU_IRQ_STATE_DISABLE:
4894 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4895 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4896 					     TIME_STAMP_INT_ENABLE, 0);
4897 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4898 		break;
4899 	case AMDGPU_IRQ_STATE_ENABLE:
4900 		mec_int_cntl = RREG32(mec_int_cntl_reg);
4901 		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4902 					     TIME_STAMP_INT_ENABLE, 1);
4903 		WREG32(mec_int_cntl_reg, mec_int_cntl);
4904 		break;
4905 	default:
4906 		break;
4907 	}
4908 }
4909 
4910 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4911 					     struct amdgpu_irq_src *source,
4912 					     unsigned type,
4913 					     enum amdgpu_interrupt_state state)
4914 {
4915 	switch (state) {
4916 	case AMDGPU_IRQ_STATE_DISABLE:
4917 	case AMDGPU_IRQ_STATE_ENABLE:
4918 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4919 			       PRIV_REG_INT_ENABLE,
4920 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4921 		break;
4922 	default:
4923 		break;
4924 	}
4925 
4926 	return 0;
4927 }
4928 
4929 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4930 					      struct amdgpu_irq_src *source,
4931 					      unsigned type,
4932 					      enum amdgpu_interrupt_state state)
4933 {
4934 	switch (state) {
4935 	case AMDGPU_IRQ_STATE_DISABLE:
4936 	case AMDGPU_IRQ_STATE_ENABLE:
4937 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4938 			       PRIV_INSTR_INT_ENABLE,
4939 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
4940 	default:
4941 		break;
4942 	}
4943 
4944 	return 0;
4945 }
4946 
4947 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)				\
4948 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4949 			CP_ECC_ERROR_INT_ENABLE, 1)
4950 
4951 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)			\
4952 	WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
4953 			CP_ECC_ERROR_INT_ENABLE, 0)
4954 
4955 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
4956 					      struct amdgpu_irq_src *source,
4957 					      unsigned type,
4958 					      enum amdgpu_interrupt_state state)
4959 {
4960 	switch (state) {
4961 	case AMDGPU_IRQ_STATE_DISABLE:
4962 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4963 				CP_ECC_ERROR_INT_ENABLE, 0);
4964 		DISABLE_ECC_ON_ME_PIPE(1, 0);
4965 		DISABLE_ECC_ON_ME_PIPE(1, 1);
4966 		DISABLE_ECC_ON_ME_PIPE(1, 2);
4967 		DISABLE_ECC_ON_ME_PIPE(1, 3);
4968 		break;
4969 
4970 	case AMDGPU_IRQ_STATE_ENABLE:
4971 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
4972 				CP_ECC_ERROR_INT_ENABLE, 1);
4973 		ENABLE_ECC_ON_ME_PIPE(1, 0);
4974 		ENABLE_ECC_ON_ME_PIPE(1, 1);
4975 		ENABLE_ECC_ON_ME_PIPE(1, 2);
4976 		ENABLE_ECC_ON_ME_PIPE(1, 3);
4977 		break;
4978 	default:
4979 		break;
4980 	}
4981 
4982 	return 0;
4983 }
4984 
4985 
4986 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4987 					    struct amdgpu_irq_src *src,
4988 					    unsigned type,
4989 					    enum amdgpu_interrupt_state state)
4990 {
4991 	switch (type) {
4992 	case AMDGPU_CP_IRQ_GFX_EOP:
4993 		gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
4994 		break;
4995 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4996 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4997 		break;
4998 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4999 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5000 		break;
5001 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5002 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5003 		break;
5004 	case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5005 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5006 		break;
5007 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5008 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5009 		break;
5010 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5011 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5012 		break;
5013 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5014 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5015 		break;
5016 	case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5017 		gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5018 		break;
5019 	default:
5020 		break;
5021 	}
5022 	return 0;
5023 }
5024 
5025 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5026 			    struct amdgpu_irq_src *source,
5027 			    struct amdgpu_iv_entry *entry)
5028 {
5029 	int i;
5030 	u8 me_id, pipe_id, queue_id;
5031 	struct amdgpu_ring *ring;
5032 
5033 	DRM_DEBUG("IH: CP EOP\n");
5034 	me_id = (entry->ring_id & 0x0c) >> 2;
5035 	pipe_id = (entry->ring_id & 0x03) >> 0;
5036 	queue_id = (entry->ring_id & 0x70) >> 4;
5037 
5038 	switch (me_id) {
5039 	case 0:
5040 		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5041 		break;
5042 	case 1:
5043 	case 2:
5044 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5045 			ring = &adev->gfx.compute_ring[i];
5046 			/* Per-queue interrupt is supported for MEC starting from VI.
5047 			  * The interrupt can only be enabled/disabled per pipe instead of per queue.
5048 			  */
5049 			if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5050 				amdgpu_fence_process(ring);
5051 		}
5052 		break;
5053 	}
5054 	return 0;
5055 }
5056 
5057 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5058 			   struct amdgpu_iv_entry *entry)
5059 {
5060 	u8 me_id, pipe_id, queue_id;
5061 	struct amdgpu_ring *ring;
5062 	int i;
5063 
5064 	me_id = (entry->ring_id & 0x0c) >> 2;
5065 	pipe_id = (entry->ring_id & 0x03) >> 0;
5066 	queue_id = (entry->ring_id & 0x70) >> 4;
5067 
5068 	switch (me_id) {
5069 	case 0:
5070 		drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5071 		break;
5072 	case 1:
5073 	case 2:
5074 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5075 			ring = &adev->gfx.compute_ring[i];
5076 			if (ring->me == me_id && ring->pipe == pipe_id &&
5077 			    ring->queue == queue_id)
5078 				drm_sched_fault(&ring->sched);
5079 		}
5080 		break;
5081 	}
5082 }
5083 
5084 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5085 				 struct amdgpu_irq_src *source,
5086 				 struct amdgpu_iv_entry *entry)
5087 {
5088 	DRM_ERROR("Illegal register access in command stream\n");
5089 	gfx_v9_0_fault(adev, entry);
5090 	return 0;
5091 }
5092 
5093 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5094 				  struct amdgpu_irq_src *source,
5095 				  struct amdgpu_iv_entry *entry)
5096 {
5097 	DRM_ERROR("Illegal instruction in command stream\n");
5098 	gfx_v9_0_fault(adev, entry);
5099 	return 0;
5100 }
5101 
5102 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5103 		struct amdgpu_iv_entry *entry)
5104 {
5105 	/* TODO ue will trigger an interrupt. */
5106 	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5107 	amdgpu_ras_reset_gpu(adev, 0);
5108 	return AMDGPU_RAS_UE;
5109 }
5110 
5111 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
5112 				  struct amdgpu_irq_src *source,
5113 				  struct amdgpu_iv_entry *entry)
5114 {
5115 	struct ras_common_if *ras_if = adev->gfx.ras_if;
5116 	struct ras_dispatch_if ih_data = {
5117 		.entry = entry,
5118 	};
5119 
5120 	if (!ras_if)
5121 		return 0;
5122 
5123 	ih_data.head = *ras_if;
5124 
5125 	DRM_ERROR("CP ECC ERROR IRQ\n");
5126 	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
5127 	return 0;
5128 }
5129 
5130 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
5131 	.name = "gfx_v9_0",
5132 	.early_init = gfx_v9_0_early_init,
5133 	.late_init = gfx_v9_0_late_init,
5134 	.sw_init = gfx_v9_0_sw_init,
5135 	.sw_fini = gfx_v9_0_sw_fini,
5136 	.hw_init = gfx_v9_0_hw_init,
5137 	.hw_fini = gfx_v9_0_hw_fini,
5138 	.suspend = gfx_v9_0_suspend,
5139 	.resume = gfx_v9_0_resume,
5140 	.is_idle = gfx_v9_0_is_idle,
5141 	.wait_for_idle = gfx_v9_0_wait_for_idle,
5142 	.soft_reset = gfx_v9_0_soft_reset,
5143 	.set_clockgating_state = gfx_v9_0_set_clockgating_state,
5144 	.set_powergating_state = gfx_v9_0_set_powergating_state,
5145 	.get_clockgating_state = gfx_v9_0_get_clockgating_state,
5146 };
5147 
5148 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
5149 	.type = AMDGPU_RING_TYPE_GFX,
5150 	.align_mask = 0xff,
5151 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5152 	.support_64bit_ptrs = true,
5153 	.vmhub = AMDGPU_GFXHUB,
5154 	.get_rptr = gfx_v9_0_ring_get_rptr_gfx,
5155 	.get_wptr = gfx_v9_0_ring_get_wptr_gfx,
5156 	.set_wptr = gfx_v9_0_ring_set_wptr_gfx,
5157 	.emit_frame_size = /* totally 242 maximum if 16 IBs */
5158 		5 +  /* COND_EXEC */
5159 		7 +  /* PIPELINE_SYNC */
5160 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5161 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5162 		2 + /* VM_FLUSH */
5163 		8 +  /* FENCE for VM_FLUSH */
5164 		20 + /* GDS switch */
5165 		4 + /* double SWITCH_BUFFER,
5166 		       the first COND_EXEC jump to the place just
5167 			   prior to this double SWITCH_BUFFER  */
5168 		5 + /* COND_EXEC */
5169 		7 +	 /*	HDP_flush */
5170 		4 +	 /*	VGT_flush */
5171 		14 + /*	CE_META */
5172 		31 + /*	DE_META */
5173 		3 + /* CNTX_CTRL */
5174 		5 + /* HDP_INVL */
5175 		8 + 8 + /* FENCE x2 */
5176 		2, /* SWITCH_BUFFER */
5177 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
5178 	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
5179 	.emit_fence = gfx_v9_0_ring_emit_fence,
5180 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5181 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5182 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5183 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5184 	.test_ring = gfx_v9_0_ring_test_ring,
5185 	.test_ib = gfx_v9_0_ring_test_ib,
5186 	.insert_nop = amdgpu_ring_insert_nop,
5187 	.pad_ib = amdgpu_ring_generic_pad_ib,
5188 	.emit_switch_buffer = gfx_v9_ring_emit_sb,
5189 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
5190 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
5191 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
5192 	.emit_tmz = gfx_v9_0_ring_emit_tmz,
5193 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5194 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5195 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5196 	.soft_recovery = gfx_v9_0_ring_soft_recovery,
5197 };
5198 
5199 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
5200 	.type = AMDGPU_RING_TYPE_COMPUTE,
5201 	.align_mask = 0xff,
5202 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5203 	.support_64bit_ptrs = true,
5204 	.vmhub = AMDGPU_GFXHUB,
5205 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5206 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5207 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5208 	.emit_frame_size =
5209 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5210 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5211 		5 + /* hdp invalidate */
5212 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5213 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5214 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5215 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5216 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
5217 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5218 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
5219 	.emit_fence = gfx_v9_0_ring_emit_fence,
5220 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
5221 	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
5222 	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
5223 	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
5224 	.test_ring = gfx_v9_0_ring_test_ring,
5225 	.test_ib = gfx_v9_0_ring_test_ib,
5226 	.insert_nop = amdgpu_ring_insert_nop,
5227 	.pad_ib = amdgpu_ring_generic_pad_ib,
5228 	.set_priority = gfx_v9_0_ring_set_priority_compute,
5229 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5230 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5231 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5232 };
5233 
5234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
5235 	.type = AMDGPU_RING_TYPE_KIQ,
5236 	.align_mask = 0xff,
5237 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
5238 	.support_64bit_ptrs = true,
5239 	.vmhub = AMDGPU_GFXHUB,
5240 	.get_rptr = gfx_v9_0_ring_get_rptr_compute,
5241 	.get_wptr = gfx_v9_0_ring_get_wptr_compute,
5242 	.set_wptr = gfx_v9_0_ring_set_wptr_compute,
5243 	.emit_frame_size =
5244 		20 + /* gfx_v9_0_ring_emit_gds_switch */
5245 		7 + /* gfx_v9_0_ring_emit_hdp_flush */
5246 		5 + /* hdp invalidate */
5247 		7 + /* gfx_v9_0_ring_emit_pipeline_sync */
5248 		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
5249 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
5250 		2 + /* gfx_v9_0_ring_emit_vm_flush */
5251 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
5252 	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
5253 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
5254 	.test_ring = gfx_v9_0_ring_test_ring,
5255 	.insert_nop = amdgpu_ring_insert_nop,
5256 	.pad_ib = amdgpu_ring_generic_pad_ib,
5257 	.emit_rreg = gfx_v9_0_ring_emit_rreg,
5258 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
5259 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
5260 	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
5261 };
5262 
5263 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
5264 {
5265 	int i;
5266 
5267 	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
5268 
5269 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5270 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
5271 
5272 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
5273 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
5274 }
5275 
5276 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
5277 	.set = gfx_v9_0_set_eop_interrupt_state,
5278 	.process = gfx_v9_0_eop_irq,
5279 };
5280 
5281 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
5282 	.set = gfx_v9_0_set_priv_reg_fault_state,
5283 	.process = gfx_v9_0_priv_reg_irq,
5284 };
5285 
5286 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
5287 	.set = gfx_v9_0_set_priv_inst_fault_state,
5288 	.process = gfx_v9_0_priv_inst_irq,
5289 };
5290 
5291 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
5292 	.set = gfx_v9_0_set_cp_ecc_error_state,
5293 	.process = gfx_v9_0_cp_ecc_error_irq,
5294 };
5295 
5296 
5297 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
5298 {
5299 	adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5300 	adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
5301 
5302 	adev->gfx.priv_reg_irq.num_types = 1;
5303 	adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
5304 
5305 	adev->gfx.priv_inst_irq.num_types = 1;
5306 	adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
5307 
5308 	adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
5309 	adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
5310 }
5311 
5312 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
5313 {
5314 	switch (adev->asic_type) {
5315 	case CHIP_VEGA10:
5316 	case CHIP_VEGA12:
5317 	case CHIP_VEGA20:
5318 	case CHIP_RAVEN:
5319 		adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
5320 		break;
5321 	default:
5322 		break;
5323 	}
5324 }
5325 
5326 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
5327 {
5328 	/* init asci gds info */
5329 	switch (adev->asic_type) {
5330 	case CHIP_VEGA10:
5331 	case CHIP_VEGA12:
5332 	case CHIP_VEGA20:
5333 		adev->gds.gds_size = 0x10000;
5334 		break;
5335 	case CHIP_RAVEN:
5336 		adev->gds.gds_size = 0x1000;
5337 		break;
5338 	default:
5339 		adev->gds.gds_size = 0x10000;
5340 		break;
5341 	}
5342 
5343 	switch (adev->asic_type) {
5344 	case CHIP_VEGA10:
5345 	case CHIP_VEGA20:
5346 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5347 		break;
5348 	case CHIP_VEGA12:
5349 		adev->gds.gds_compute_max_wave_id = 0x27f;
5350 		break;
5351 	case CHIP_RAVEN:
5352 		if (adev->rev_id >= 0x8)
5353 			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
5354 		else
5355 			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
5356 		break;
5357 	default:
5358 		/* this really depends on the chip */
5359 		adev->gds.gds_compute_max_wave_id = 0x7ff;
5360 		break;
5361 	}
5362 
5363 	adev->gds.gws_size = 64;
5364 	adev->gds.oa_size = 16;
5365 }
5366 
5367 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
5368 						 u32 bitmap)
5369 {
5370 	u32 data;
5371 
5372 	if (!bitmap)
5373 		return;
5374 
5375 	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5376 	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5377 
5378 	WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
5379 }
5380 
5381 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5382 {
5383 	u32 data, mask;
5384 
5385 	data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
5386 	data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
5387 
5388 	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5389 	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5390 
5391 	mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
5392 
5393 	return (~data) & mask;
5394 }
5395 
5396 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
5397 				 struct amdgpu_cu_info *cu_info)
5398 {
5399 	int i, j, k, counter, active_cu_number = 0;
5400 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5401 	unsigned disable_masks[4 * 2];
5402 
5403 	if (!adev || !cu_info)
5404 		return -EINVAL;
5405 
5406 	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
5407 
5408 	mutex_lock(&adev->grbm_idx_mutex);
5409 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5410 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5411 			mask = 1;
5412 			ao_bitmap = 0;
5413 			counter = 0;
5414 			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
5415 			if (i < 4 && j < 2)
5416 				gfx_v9_0_set_user_cu_inactive_bitmap(
5417 					adev, disable_masks[i * 2 + j]);
5418 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
5419 			cu_info->bitmap[i][j] = bitmap;
5420 
5421 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5422 				if (bitmap & mask) {
5423 					if (counter < adev->gfx.config.max_cu_per_sh)
5424 						ao_bitmap |= mask;
5425 					counter ++;
5426 				}
5427 				mask <<= 1;
5428 			}
5429 			active_cu_number += counter;
5430 			if (i < 2 && j < 2)
5431 				ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5432 			cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
5433 		}
5434 	}
5435 	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5436 	mutex_unlock(&adev->grbm_idx_mutex);
5437 
5438 	cu_info->number = active_cu_number;
5439 	cu_info->ao_cu_mask = ao_cu_mask;
5440 	cu_info->simd_per_cu = NUM_SIMD_PER_CU;
5441 
5442 	return 0;
5443 }
5444 
5445 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
5446 {
5447 	.type = AMD_IP_BLOCK_TYPE_GFX,
5448 	.major = 9,
5449 	.minor = 0,
5450 	.rev = 0,
5451 	.funcs = &gfx_v9_0_ip_funcs,
5452 };
5453