1 /* 2 * Copyright 2016 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "soc15.h" 33 #include "soc15d.h" 34 #include "amdgpu_atomfirmware.h" 35 #include "amdgpu_pm.h" 36 37 #include "gc/gc_9_0_offset.h" 38 #include "gc/gc_9_0_sh_mask.h" 39 #include "vega10_enum.h" 40 #include "hdp/hdp_4_0_offset.h" 41 42 #include "soc15.h" 43 #include "soc15_common.h" 44 #include "clearstate_gfx9.h" 45 #include "v9_structs.h" 46 47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" 48 49 #include "amdgpu_ras.h" 50 51 #define GFX9_NUM_GFX_RINGS 1 52 #define GFX9_MEC_HPD_SIZE 4096 53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L 54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L 55 56 #define mmPWR_MISC_CNTL_STATUS 0x0183 57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L 61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L 62 63 MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); 64 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); 65 MODULE_FIRMWARE("amdgpu/vega10_me.bin"); 66 MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); 67 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); 68 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); 69 70 MODULE_FIRMWARE("amdgpu/vega12_ce.bin"); 71 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin"); 72 MODULE_FIRMWARE("amdgpu/vega12_me.bin"); 73 MODULE_FIRMWARE("amdgpu/vega12_mec.bin"); 74 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin"); 75 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin"); 76 77 MODULE_FIRMWARE("amdgpu/vega20_ce.bin"); 78 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin"); 79 MODULE_FIRMWARE("amdgpu/vega20_me.bin"); 80 MODULE_FIRMWARE("amdgpu/vega20_mec.bin"); 81 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin"); 82 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin"); 83 84 MODULE_FIRMWARE("amdgpu/raven_ce.bin"); 85 MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); 86 MODULE_FIRMWARE("amdgpu/raven_me.bin"); 87 MODULE_FIRMWARE("amdgpu/raven_mec.bin"); 88 MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); 89 MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); 90 91 MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); 92 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); 93 MODULE_FIRMWARE("amdgpu/picasso_me.bin"); 94 MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); 95 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); 96 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); 97 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); 98 99 MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); 100 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); 101 MODULE_FIRMWARE("amdgpu/raven2_me.bin"); 102 MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); 103 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); 104 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); 105 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); 106 107 static const struct soc15_reg_golden golden_settings_gc_9_0[] = 108 { 109 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), 110 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), 111 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 112 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 113 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 114 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 115 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 116 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 117 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 118 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 119 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 120 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 121 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 122 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 123 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 124 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 125 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff), 126 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 127 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 128 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 129 }; 130 131 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = 132 { 133 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107), 134 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 135 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 136 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 137 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 138 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), 139 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042), 140 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 141 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000), 142 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 143 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 144 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 145 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 146 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 147 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 148 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), 149 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), 150 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) 151 }; 152 153 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = 154 { 155 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080), 156 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 157 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 158 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042), 159 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042), 160 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400), 161 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000), 162 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000), 163 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107), 164 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000), 165 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000) 166 }; 167 168 static const struct soc15_reg_golden golden_settings_gc_9_1[] = 169 { 170 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 171 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080), 172 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080), 173 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080), 174 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 175 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 176 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080), 177 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 178 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 179 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 180 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080), 181 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080), 182 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080), 183 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080), 184 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080), 185 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 186 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 187 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), 188 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 189 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), 190 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), 191 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 192 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 193 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 194 }; 195 196 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = 197 { 198 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000), 199 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042), 200 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042), 201 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000), 202 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000), 203 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), 204 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) 205 }; 206 207 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = 208 { 209 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), 210 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 211 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), 212 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), 213 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), 214 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), 215 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), 216 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), 217 SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 218 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 219 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), 220 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), 221 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), 222 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), 223 SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), 224 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 225 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), 226 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 227 SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), 228 }; 229 230 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = 231 { 232 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), 233 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), 234 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382) 235 }; 236 237 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] = 238 { 239 SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), 240 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), 241 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), 242 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), 243 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), 244 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000), 245 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800), 246 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800), 247 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87), 248 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f), 249 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000), 250 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), 251 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68), 252 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197), 253 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), 254 SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff) 255 }; 256 257 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = 258 { 259 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080), 260 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), 261 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000), 262 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041), 263 SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041), 264 SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), 265 SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), 266 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), 267 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), 268 SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), 269 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), 270 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), 271 SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) 272 }; 273 274 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = 275 { 276 mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 277 mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 278 mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 279 mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 280 mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 281 mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 282 mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 283 mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0, 284 }; 285 286 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = 287 { 288 mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0, 289 mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0, 290 mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0, 291 mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0, 292 mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0, 293 mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0, 294 mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0, 295 mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, 296 }; 297 298 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 299 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 300 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 301 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 302 303 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); 304 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); 305 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev); 306 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); 307 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 308 struct amdgpu_cu_info *cu_info); 309 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); 310 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); 311 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); 312 313 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) 314 { 315 switch (adev->asic_type) { 316 case CHIP_VEGA10: 317 if (!amdgpu_virt_support_skip_setting(adev)) { 318 soc15_program_register_sequence(adev, 319 golden_settings_gc_9_0, 320 ARRAY_SIZE(golden_settings_gc_9_0)); 321 soc15_program_register_sequence(adev, 322 golden_settings_gc_9_0_vg10, 323 ARRAY_SIZE(golden_settings_gc_9_0_vg10)); 324 } 325 break; 326 case CHIP_VEGA12: 327 soc15_program_register_sequence(adev, 328 golden_settings_gc_9_2_1, 329 ARRAY_SIZE(golden_settings_gc_9_2_1)); 330 soc15_program_register_sequence(adev, 331 golden_settings_gc_9_2_1_vg12, 332 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12)); 333 break; 334 case CHIP_VEGA20: 335 soc15_program_register_sequence(adev, 336 golden_settings_gc_9_0, 337 ARRAY_SIZE(golden_settings_gc_9_0)); 338 soc15_program_register_sequence(adev, 339 golden_settings_gc_9_0_vg20, 340 ARRAY_SIZE(golden_settings_gc_9_0_vg20)); 341 break; 342 case CHIP_RAVEN: 343 soc15_program_register_sequence(adev, golden_settings_gc_9_1, 344 ARRAY_SIZE(golden_settings_gc_9_1)); 345 if (adev->rev_id >= 8) 346 soc15_program_register_sequence(adev, 347 golden_settings_gc_9_1_rv2, 348 ARRAY_SIZE(golden_settings_gc_9_1_rv2)); 349 else 350 soc15_program_register_sequence(adev, 351 golden_settings_gc_9_1_rv1, 352 ARRAY_SIZE(golden_settings_gc_9_1_rv1)); 353 break; 354 default: 355 break; 356 } 357 358 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, 359 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); 360 } 361 362 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) 363 { 364 adev->gfx.scratch.num_reg = 8; 365 adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); 366 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 367 } 368 369 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, 370 bool wc, uint32_t reg, uint32_t val) 371 { 372 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 373 amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) | 374 WRITE_DATA_DST_SEL(0) | 375 (wc ? WR_CONFIRM : 0)); 376 amdgpu_ring_write(ring, reg); 377 amdgpu_ring_write(ring, 0); 378 amdgpu_ring_write(ring, val); 379 } 380 381 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, 382 int mem_space, int opt, uint32_t addr0, 383 uint32_t addr1, uint32_t ref, uint32_t mask, 384 uint32_t inv) 385 { 386 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 387 amdgpu_ring_write(ring, 388 /* memory (1) or register (0) */ 389 (WAIT_REG_MEM_MEM_SPACE(mem_space) | 390 WAIT_REG_MEM_OPERATION(opt) | /* wait */ 391 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 392 WAIT_REG_MEM_ENGINE(eng_sel))); 393 394 if (mem_space) 395 BUG_ON(addr0 & 0x3); /* Dword align */ 396 amdgpu_ring_write(ring, addr0); 397 amdgpu_ring_write(ring, addr1); 398 amdgpu_ring_write(ring, ref); 399 amdgpu_ring_write(ring, mask); 400 amdgpu_ring_write(ring, inv); /* poll interval */ 401 } 402 403 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) 404 { 405 struct amdgpu_device *adev = ring->adev; 406 uint32_t scratch; 407 uint32_t tmp = 0; 408 unsigned i; 409 int r; 410 411 r = amdgpu_gfx_scratch_get(adev, &scratch); 412 if (r) 413 return r; 414 415 WREG32(scratch, 0xCAFEDEAD); 416 r = amdgpu_ring_alloc(ring, 3); 417 if (r) 418 goto error_free_scratch; 419 420 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 421 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 422 amdgpu_ring_write(ring, 0xDEADBEEF); 423 amdgpu_ring_commit(ring); 424 425 for (i = 0; i < adev->usec_timeout; i++) { 426 tmp = RREG32(scratch); 427 if (tmp == 0xDEADBEEF) 428 break; 429 udelay(1); 430 } 431 432 if (i >= adev->usec_timeout) 433 r = -ETIMEDOUT; 434 435 error_free_scratch: 436 amdgpu_gfx_scratch_free(adev, scratch); 437 return r; 438 } 439 440 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 441 { 442 struct amdgpu_device *adev = ring->adev; 443 struct amdgpu_ib ib; 444 struct dma_fence *f = NULL; 445 446 unsigned index; 447 uint64_t gpu_addr; 448 uint32_t tmp; 449 long r; 450 451 r = amdgpu_device_wb_get(adev, &index); 452 if (r) 453 return r; 454 455 gpu_addr = adev->wb.gpu_addr + (index * 4); 456 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 457 memset(&ib, 0, sizeof(ib)); 458 r = amdgpu_ib_get(adev, NULL, 16, &ib); 459 if (r) 460 goto err1; 461 462 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 463 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 464 ib.ptr[2] = lower_32_bits(gpu_addr); 465 ib.ptr[3] = upper_32_bits(gpu_addr); 466 ib.ptr[4] = 0xDEADBEEF; 467 ib.length_dw = 5; 468 469 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 470 if (r) 471 goto err2; 472 473 r = dma_fence_wait_timeout(f, false, timeout); 474 if (r == 0) { 475 r = -ETIMEDOUT; 476 goto err2; 477 } else if (r < 0) { 478 goto err2; 479 } 480 481 tmp = adev->wb.wb[index]; 482 if (tmp == 0xDEADBEEF) 483 r = 0; 484 else 485 r = -EINVAL; 486 487 err2: 488 amdgpu_ib_free(adev, &ib, NULL); 489 dma_fence_put(f); 490 err1: 491 amdgpu_device_wb_free(adev, index); 492 return r; 493 } 494 495 496 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev) 497 { 498 release_firmware(adev->gfx.pfp_fw); 499 adev->gfx.pfp_fw = NULL; 500 release_firmware(adev->gfx.me_fw); 501 adev->gfx.me_fw = NULL; 502 release_firmware(adev->gfx.ce_fw); 503 adev->gfx.ce_fw = NULL; 504 release_firmware(adev->gfx.rlc_fw); 505 adev->gfx.rlc_fw = NULL; 506 release_firmware(adev->gfx.mec_fw); 507 adev->gfx.mec_fw = NULL; 508 release_firmware(adev->gfx.mec2_fw); 509 adev->gfx.mec2_fw = NULL; 510 511 kfree(adev->gfx.rlc.register_list_format); 512 } 513 514 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) 515 { 516 const struct rlc_firmware_header_v2_1 *rlc_hdr; 517 518 rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data; 519 adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver); 520 adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver); 521 adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes); 522 adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes); 523 adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver); 524 adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver); 525 adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes); 526 adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes); 527 adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver); 528 adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver); 529 adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes); 530 adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes); 531 adev->gfx.rlc.reg_list_format_direct_reg_list_length = 532 le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); 533 } 534 535 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) 536 { 537 adev->gfx.me_fw_write_wait = false; 538 adev->gfx.mec_fw_write_wait = false; 539 540 switch (adev->asic_type) { 541 case CHIP_VEGA10: 542 if ((adev->gfx.me_fw_version >= 0x0000009c) && 543 (adev->gfx.me_feature_version >= 42) && 544 (adev->gfx.pfp_fw_version >= 0x000000b1) && 545 (adev->gfx.pfp_feature_version >= 42)) 546 adev->gfx.me_fw_write_wait = true; 547 548 if ((adev->gfx.mec_fw_version >= 0x00000193) && 549 (adev->gfx.mec_feature_version >= 42)) 550 adev->gfx.mec_fw_write_wait = true; 551 break; 552 case CHIP_VEGA12: 553 if ((adev->gfx.me_fw_version >= 0x0000009c) && 554 (adev->gfx.me_feature_version >= 44) && 555 (adev->gfx.pfp_fw_version >= 0x000000b2) && 556 (adev->gfx.pfp_feature_version >= 44)) 557 adev->gfx.me_fw_write_wait = true; 558 559 if ((adev->gfx.mec_fw_version >= 0x00000196) && 560 (adev->gfx.mec_feature_version >= 44)) 561 adev->gfx.mec_fw_write_wait = true; 562 break; 563 case CHIP_VEGA20: 564 if ((adev->gfx.me_fw_version >= 0x0000009c) && 565 (adev->gfx.me_feature_version >= 44) && 566 (adev->gfx.pfp_fw_version >= 0x000000b2) && 567 (adev->gfx.pfp_feature_version >= 44)) 568 adev->gfx.me_fw_write_wait = true; 569 570 if ((adev->gfx.mec_fw_version >= 0x00000197) && 571 (adev->gfx.mec_feature_version >= 44)) 572 adev->gfx.mec_fw_write_wait = true; 573 break; 574 case CHIP_RAVEN: 575 if ((adev->gfx.me_fw_version >= 0x0000009c) && 576 (adev->gfx.me_feature_version >= 42) && 577 (adev->gfx.pfp_fw_version >= 0x000000b1) && 578 (adev->gfx.pfp_feature_version >= 42)) 579 adev->gfx.me_fw_write_wait = true; 580 581 if ((adev->gfx.mec_fw_version >= 0x00000192) && 582 (adev->gfx.mec_feature_version >= 42)) 583 adev->gfx.mec_fw_write_wait = true; 584 break; 585 default: 586 break; 587 } 588 } 589 590 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) 591 { 592 switch (adev->asic_type) { 593 case CHIP_VEGA10: 594 case CHIP_VEGA12: 595 case CHIP_VEGA20: 596 break; 597 case CHIP_RAVEN: 598 if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) 599 break; 600 if ((adev->gfx.rlc_fw_version != 106 && 601 adev->gfx.rlc_fw_version < 531) || 602 (adev->gfx.rlc_fw_version == 53815) || 603 (adev->gfx.rlc_feature_version < 1) || 604 !adev->gfx.rlc.is_rlc_v2_1) 605 adev->pm.pp_feature &= ~PP_GFXOFF_MASK; 606 break; 607 default: 608 break; 609 } 610 } 611 612 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) 613 { 614 const char *chip_name; 615 char fw_name[30]; 616 int err; 617 struct amdgpu_firmware_info *info = NULL; 618 const struct common_firmware_header *header = NULL; 619 const struct gfx_firmware_header_v1_0 *cp_hdr; 620 const struct rlc_firmware_header_v2_0 *rlc_hdr; 621 unsigned int *tmp = NULL; 622 unsigned int i = 0; 623 uint16_t version_major; 624 uint16_t version_minor; 625 uint32_t smu_version; 626 627 DRM_DEBUG("\n"); 628 629 switch (adev->asic_type) { 630 case CHIP_VEGA10: 631 chip_name = "vega10"; 632 break; 633 case CHIP_VEGA12: 634 chip_name = "vega12"; 635 break; 636 case CHIP_VEGA20: 637 chip_name = "vega20"; 638 break; 639 case CHIP_RAVEN: 640 if (adev->rev_id >= 8) 641 chip_name = "raven2"; 642 else if (adev->pdev->device == 0x15d8) 643 chip_name = "picasso"; 644 else 645 chip_name = "raven"; 646 break; 647 default: 648 BUG(); 649 } 650 651 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 652 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 653 if (err) 654 goto out; 655 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 656 if (err) 657 goto out; 658 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 659 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 660 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 661 662 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 663 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 664 if (err) 665 goto out; 666 err = amdgpu_ucode_validate(adev->gfx.me_fw); 667 if (err) 668 goto out; 669 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 670 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 671 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 672 673 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 674 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 675 if (err) 676 goto out; 677 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 678 if (err) 679 goto out; 680 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 681 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 682 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 683 684 /* 685 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin 686 * instead of picasso_rlc.bin. 687 * Judgment method: 688 * PCO AM4: revision >= 0xC8 && revision <= 0xCF 689 * or revision >= 0xD8 && revision <= 0xDF 690 * otherwise is PCO FP5 691 */ 692 if (!strcmp(chip_name, "picasso") && 693 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || 694 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) 695 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); 696 else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && 697 (smu_version >= 0x41e2b)) 698 /** 699 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. 700 */ 701 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name); 702 else 703 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 704 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 705 if (err) 706 goto out; 707 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 708 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 709 710 version_major = le16_to_cpu(rlc_hdr->header.header_version_major); 711 version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); 712 if (version_major == 2 && version_minor == 1) 713 adev->gfx.rlc.is_rlc_v2_1 = true; 714 715 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 716 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 717 adev->gfx.rlc.save_and_restore_offset = 718 le32_to_cpu(rlc_hdr->save_and_restore_offset); 719 adev->gfx.rlc.clear_state_descriptor_offset = 720 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 721 adev->gfx.rlc.avail_scratch_ram_locations = 722 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 723 adev->gfx.rlc.reg_restore_list_size = 724 le32_to_cpu(rlc_hdr->reg_restore_list_size); 725 adev->gfx.rlc.reg_list_format_start = 726 le32_to_cpu(rlc_hdr->reg_list_format_start); 727 adev->gfx.rlc.reg_list_format_separate_start = 728 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 729 adev->gfx.rlc.starting_offsets_start = 730 le32_to_cpu(rlc_hdr->starting_offsets_start); 731 adev->gfx.rlc.reg_list_format_size_bytes = 732 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 733 adev->gfx.rlc.reg_list_size_bytes = 734 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 735 adev->gfx.rlc.register_list_format = 736 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 737 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 738 if (!adev->gfx.rlc.register_list_format) { 739 err = -ENOMEM; 740 goto out; 741 } 742 743 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 744 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 745 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 746 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 747 748 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 749 750 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 751 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 752 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 753 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 754 755 if (adev->gfx.rlc.is_rlc_v2_1) 756 gfx_v9_0_init_rlc_ext_microcode(adev); 757 758 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 759 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 760 if (err) 761 goto out; 762 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 763 if (err) 764 goto out; 765 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 766 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 767 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 768 769 770 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 771 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 772 if (!err) { 773 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 774 if (err) 775 goto out; 776 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 777 adev->gfx.mec2_fw->data; 778 adev->gfx.mec2_fw_version = 779 le32_to_cpu(cp_hdr->header.ucode_version); 780 adev->gfx.mec2_feature_version = 781 le32_to_cpu(cp_hdr->ucode_feature_version); 782 } else { 783 err = 0; 784 adev->gfx.mec2_fw = NULL; 785 } 786 787 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { 788 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 789 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 790 info->fw = adev->gfx.pfp_fw; 791 header = (const struct common_firmware_header *)info->fw->data; 792 adev->firmware.fw_size += 793 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 794 795 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 796 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 797 info->fw = adev->gfx.me_fw; 798 header = (const struct common_firmware_header *)info->fw->data; 799 adev->firmware.fw_size += 800 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 801 802 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 803 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 804 info->fw = adev->gfx.ce_fw; 805 header = (const struct common_firmware_header *)info->fw->data; 806 adev->firmware.fw_size += 807 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 808 809 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 810 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 811 info->fw = adev->gfx.rlc_fw; 812 header = (const struct common_firmware_header *)info->fw->data; 813 adev->firmware.fw_size += 814 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 815 816 if (adev->gfx.rlc.is_rlc_v2_1 && 817 adev->gfx.rlc.save_restore_list_cntl_size_bytes && 818 adev->gfx.rlc.save_restore_list_gpm_size_bytes && 819 adev->gfx.rlc.save_restore_list_srm_size_bytes) { 820 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL]; 821 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL; 822 info->fw = adev->gfx.rlc_fw; 823 adev->firmware.fw_size += 824 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE); 825 826 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM]; 827 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM; 828 info->fw = adev->gfx.rlc_fw; 829 adev->firmware.fw_size += 830 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE); 831 832 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM]; 833 info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM; 834 info->fw = adev->gfx.rlc_fw; 835 adev->firmware.fw_size += 836 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); 837 } 838 839 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 840 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 841 info->fw = adev->gfx.mec_fw; 842 header = (const struct common_firmware_header *)info->fw->data; 843 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 844 adev->firmware.fw_size += 845 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 846 847 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT]; 848 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT; 849 info->fw = adev->gfx.mec_fw; 850 adev->firmware.fw_size += 851 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 852 853 if (adev->gfx.mec2_fw) { 854 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 855 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 856 info->fw = adev->gfx.mec2_fw; 857 header = (const struct common_firmware_header *)info->fw->data; 858 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; 859 adev->firmware.fw_size += 860 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 861 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; 862 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; 863 info->fw = adev->gfx.mec2_fw; 864 adev->firmware.fw_size += 865 ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); 866 } 867 868 } 869 870 out: 871 gfx_v9_0_check_if_need_gfxoff(adev); 872 gfx_v9_0_check_fw_write_wait(adev); 873 if (err) { 874 dev_err(adev->dev, 875 "gfx9: Failed to load firmware \"%s\"\n", 876 fw_name); 877 release_firmware(adev->gfx.pfp_fw); 878 adev->gfx.pfp_fw = NULL; 879 release_firmware(adev->gfx.me_fw); 880 adev->gfx.me_fw = NULL; 881 release_firmware(adev->gfx.ce_fw); 882 adev->gfx.ce_fw = NULL; 883 release_firmware(adev->gfx.rlc_fw); 884 adev->gfx.rlc_fw = NULL; 885 release_firmware(adev->gfx.mec_fw); 886 adev->gfx.mec_fw = NULL; 887 release_firmware(adev->gfx.mec2_fw); 888 adev->gfx.mec2_fw = NULL; 889 } 890 return err; 891 } 892 893 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) 894 { 895 u32 count = 0; 896 const struct cs_section_def *sect = NULL; 897 const struct cs_extent_def *ext = NULL; 898 899 /* begin clear state */ 900 count += 2; 901 /* context control state */ 902 count += 3; 903 904 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 905 for (ext = sect->section; ext->extent != NULL; ++ext) { 906 if (sect->id == SECT_CONTEXT) 907 count += 2 + ext->reg_count; 908 else 909 return 0; 910 } 911 } 912 913 /* end clear state */ 914 count += 2; 915 /* clear state */ 916 count += 2; 917 918 return count; 919 } 920 921 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, 922 volatile u32 *buffer) 923 { 924 u32 count = 0, i; 925 const struct cs_section_def *sect = NULL; 926 const struct cs_extent_def *ext = NULL; 927 928 if (adev->gfx.rlc.cs_data == NULL) 929 return; 930 if (buffer == NULL) 931 return; 932 933 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 934 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 935 936 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 937 buffer[count++] = cpu_to_le32(0x80000000); 938 buffer[count++] = cpu_to_le32(0x80000000); 939 940 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 941 for (ext = sect->section; ext->extent != NULL; ++ext) { 942 if (sect->id == SECT_CONTEXT) { 943 buffer[count++] = 944 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 945 buffer[count++] = cpu_to_le32(ext->reg_index - 946 PACKET3_SET_CONTEXT_REG_START); 947 for (i = 0; i < ext->reg_count; i++) 948 buffer[count++] = cpu_to_le32(ext->extent[i]); 949 } else { 950 return; 951 } 952 } 953 } 954 955 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 956 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 957 958 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 959 buffer[count++] = cpu_to_le32(0); 960 } 961 962 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) 963 { 964 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 965 uint32_t pg_always_on_cu_num = 2; 966 uint32_t always_on_cu_num; 967 uint32_t i, j, k; 968 uint32_t mask, cu_bitmap, counter; 969 970 if (adev->flags & AMD_IS_APU) 971 always_on_cu_num = 4; 972 else if (adev->asic_type == CHIP_VEGA12) 973 always_on_cu_num = 8; 974 else 975 always_on_cu_num = 12; 976 977 mutex_lock(&adev->grbm_idx_mutex); 978 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 979 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 980 mask = 1; 981 cu_bitmap = 0; 982 counter = 0; 983 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 984 985 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 986 if (cu_info->bitmap[i][j] & mask) { 987 if (counter == pg_always_on_cu_num) 988 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); 989 if (counter < always_on_cu_num) 990 cu_bitmap |= mask; 991 else 992 break; 993 counter++; 994 } 995 mask <<= 1; 996 } 997 998 WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); 999 cu_info->ao_cu_bitmap[i][j] = cu_bitmap; 1000 } 1001 } 1002 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1003 mutex_unlock(&adev->grbm_idx_mutex); 1004 } 1005 1006 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) 1007 { 1008 uint32_t data; 1009 1010 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1011 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1012 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); 1013 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1014 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); 1015 1016 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1017 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1018 1019 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1020 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); 1021 1022 mutex_lock(&adev->grbm_idx_mutex); 1023 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1024 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1025 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1026 1027 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1028 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1029 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1030 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1031 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1032 1033 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1034 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1035 data &= 0x0000FFFF; 1036 data |= 0x00C00000; 1037 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1038 1039 /* 1040 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), 1041 * programmed in gfx_v9_0_init_always_on_cu_mask() 1042 */ 1043 1044 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1045 * but used for RLC_LB_CNTL configuration */ 1046 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1047 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1048 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1049 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1050 mutex_unlock(&adev->grbm_idx_mutex); 1051 1052 gfx_v9_0_init_always_on_cu_mask(adev); 1053 } 1054 1055 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) 1056 { 1057 uint32_t data; 1058 1059 /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ 1060 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); 1061 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); 1062 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); 1063 WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); 1064 1065 /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ 1066 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); 1067 1068 /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ 1069 WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); 1070 1071 mutex_lock(&adev->grbm_idx_mutex); 1072 /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ 1073 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1074 WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); 1075 1076 /* set mmRLC_LB_PARAMS = 0x003F_1006 */ 1077 data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); 1078 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); 1079 data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); 1080 WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); 1081 1082 /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ 1083 data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); 1084 data &= 0x0000FFFF; 1085 data |= 0x00C00000; 1086 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); 1087 1088 /* 1089 * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), 1090 * programmed in gfx_v9_0_init_always_on_cu_mask() 1091 */ 1092 1093 /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, 1094 * but used for RLC_LB_CNTL configuration */ 1095 data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; 1096 data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); 1097 data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); 1098 WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); 1099 mutex_unlock(&adev->grbm_idx_mutex); 1100 1101 gfx_v9_0_init_always_on_cu_mask(adev); 1102 } 1103 1104 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) 1105 { 1106 WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); 1107 } 1108 1109 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) 1110 { 1111 return 5; 1112 } 1113 1114 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) 1115 { 1116 const struct cs_section_def *cs_data; 1117 int r; 1118 1119 adev->gfx.rlc.cs_data = gfx9_cs_data; 1120 1121 cs_data = adev->gfx.rlc.cs_data; 1122 1123 if (cs_data) { 1124 /* init clear state block */ 1125 r = amdgpu_gfx_rlc_init_csb(adev); 1126 if (r) 1127 return r; 1128 } 1129 1130 if (adev->asic_type == CHIP_RAVEN) { 1131 /* TODO: double check the cp_table_size for RV */ 1132 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1133 r = amdgpu_gfx_rlc_init_cpt(adev); 1134 if (r) 1135 return r; 1136 } 1137 1138 switch (adev->asic_type) { 1139 case CHIP_RAVEN: 1140 gfx_v9_0_init_lbpw(adev); 1141 break; 1142 case CHIP_VEGA20: 1143 gfx_v9_4_init_lbpw(adev); 1144 break; 1145 default: 1146 break; 1147 } 1148 1149 return 0; 1150 } 1151 1152 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev) 1153 { 1154 int r; 1155 1156 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1157 if (unlikely(r != 0)) 1158 return r; 1159 1160 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, 1161 AMDGPU_GEM_DOMAIN_VRAM); 1162 if (!r) 1163 adev->gfx.rlc.clear_state_gpu_addr = 1164 amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj); 1165 1166 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1167 1168 return r; 1169 } 1170 1171 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev) 1172 { 1173 int r; 1174 1175 if (!adev->gfx.rlc.clear_state_obj) 1176 return; 1177 1178 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true); 1179 if (likely(r == 0)) { 1180 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1181 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1182 } 1183 } 1184 1185 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) 1186 { 1187 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1188 amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL); 1189 } 1190 1191 static int gfx_v9_0_mec_init(struct amdgpu_device *adev) 1192 { 1193 int r; 1194 u32 *hpd; 1195 const __le32 *fw_data; 1196 unsigned fw_size; 1197 u32 *fw; 1198 size_t mec_hpd_size; 1199 1200 const struct gfx_firmware_header_v1_0 *mec_hdr; 1201 1202 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1203 1204 /* take ownership of the relevant compute queues */ 1205 amdgpu_gfx_compute_queue_acquire(adev); 1206 mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; 1207 1208 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1209 AMDGPU_GEM_DOMAIN_VRAM, 1210 &adev->gfx.mec.hpd_eop_obj, 1211 &adev->gfx.mec.hpd_eop_gpu_addr, 1212 (void **)&hpd); 1213 if (r) { 1214 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1215 gfx_v9_0_mec_fini(adev); 1216 return r; 1217 } 1218 1219 memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size); 1220 1221 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1222 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1223 1224 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1225 1226 fw_data = (const __le32 *) 1227 (adev->gfx.mec_fw->data + 1228 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 1229 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 1230 1231 r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes, 1232 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, 1233 &adev->gfx.mec.mec_fw_obj, 1234 &adev->gfx.mec.mec_fw_gpu_addr, 1235 (void **)&fw); 1236 if (r) { 1237 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r); 1238 gfx_v9_0_mec_fini(adev); 1239 return r; 1240 } 1241 1242 memcpy(fw, fw_data, fw_size); 1243 1244 amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj); 1245 amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj); 1246 1247 return 0; 1248 } 1249 1250 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 1251 { 1252 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1253 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1254 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1255 (address << SQ_IND_INDEX__INDEX__SHIFT) | 1256 (SQ_IND_INDEX__FORCE_READ_MASK)); 1257 return RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1258 } 1259 1260 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 1261 uint32_t wave, uint32_t thread, 1262 uint32_t regno, uint32_t num, uint32_t *out) 1263 { 1264 WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, 1265 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 1266 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 1267 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 1268 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 1269 (SQ_IND_INDEX__FORCE_READ_MASK) | 1270 (SQ_IND_INDEX__AUTO_INCR_MASK)); 1271 while (num--) 1272 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); 1273 } 1274 1275 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 1276 { 1277 /* type 1 wave data */ 1278 dst[(*no_fields)++] = 1; 1279 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 1280 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 1281 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 1282 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 1283 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 1284 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 1285 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 1286 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 1287 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 1288 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 1289 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 1290 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 1291 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 1292 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 1293 } 1294 1295 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 1296 uint32_t wave, uint32_t start, 1297 uint32_t size, uint32_t *dst) 1298 { 1299 wave_read_regs( 1300 adev, simd, wave, 0, 1301 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 1302 } 1303 1304 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, 1305 uint32_t wave, uint32_t thread, 1306 uint32_t start, uint32_t size, 1307 uint32_t *dst) 1308 { 1309 wave_read_regs( 1310 adev, simd, wave, thread, 1311 start + SQIND_WAVE_VGPRS_OFFSET, size, dst); 1312 } 1313 1314 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, 1315 u32 me, u32 pipe, u32 q) 1316 { 1317 soc15_grbm_select(adev, me, pipe, q, 0); 1318 } 1319 1320 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { 1321 .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter, 1322 .select_se_sh = &gfx_v9_0_select_se_sh, 1323 .read_wave_data = &gfx_v9_0_read_wave_data, 1324 .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, 1325 .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, 1326 .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q 1327 }; 1328 1329 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) 1330 { 1331 u32 gb_addr_config; 1332 int err; 1333 1334 adev->gfx.funcs = &gfx_v9_0_gfx_funcs; 1335 1336 switch (adev->asic_type) { 1337 case CHIP_VEGA10: 1338 adev->gfx.config.max_hw_contexts = 8; 1339 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1340 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1341 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1342 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1343 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; 1344 break; 1345 case CHIP_VEGA12: 1346 adev->gfx.config.max_hw_contexts = 8; 1347 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1348 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1349 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1350 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1351 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN; 1352 DRM_INFO("fix gfx.config for vega12\n"); 1353 break; 1354 case CHIP_VEGA20: 1355 adev->gfx.config.max_hw_contexts = 8; 1356 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1357 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1358 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1359 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1360 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); 1361 gb_addr_config &= ~0xf3e777ff; 1362 gb_addr_config |= 0x22014042; 1363 /* check vbios table if gpu info is not available */ 1364 err = amdgpu_atomfirmware_get_gfx_info(adev); 1365 if (err) 1366 return err; 1367 break; 1368 case CHIP_RAVEN: 1369 adev->gfx.config.max_hw_contexts = 8; 1370 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1371 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1372 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1373 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; 1374 if (adev->rev_id >= 8) 1375 gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; 1376 else 1377 gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; 1378 break; 1379 default: 1380 BUG(); 1381 break; 1382 } 1383 1384 adev->gfx.config.gb_addr_config = gb_addr_config; 1385 1386 adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << 1387 REG_GET_FIELD( 1388 adev->gfx.config.gb_addr_config, 1389 GB_ADDR_CONFIG, 1390 NUM_PIPES); 1391 1392 adev->gfx.config.max_tile_pipes = 1393 adev->gfx.config.gb_addr_config_fields.num_pipes; 1394 1395 adev->gfx.config.gb_addr_config_fields.num_banks = 1 << 1396 REG_GET_FIELD( 1397 adev->gfx.config.gb_addr_config, 1398 GB_ADDR_CONFIG, 1399 NUM_BANKS); 1400 adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 << 1401 REG_GET_FIELD( 1402 adev->gfx.config.gb_addr_config, 1403 GB_ADDR_CONFIG, 1404 MAX_COMPRESSED_FRAGS); 1405 adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 << 1406 REG_GET_FIELD( 1407 adev->gfx.config.gb_addr_config, 1408 GB_ADDR_CONFIG, 1409 NUM_RB_PER_SE); 1410 adev->gfx.config.gb_addr_config_fields.num_se = 1 << 1411 REG_GET_FIELD( 1412 adev->gfx.config.gb_addr_config, 1413 GB_ADDR_CONFIG, 1414 NUM_SHADER_ENGINES); 1415 adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 + 1416 REG_GET_FIELD( 1417 adev->gfx.config.gb_addr_config, 1418 GB_ADDR_CONFIG, 1419 PIPE_INTERLEAVE_SIZE)); 1420 1421 return 0; 1422 } 1423 1424 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev, 1425 struct amdgpu_ngg_buf *ngg_buf, 1426 int size_se, 1427 int default_size_se) 1428 { 1429 int r; 1430 1431 if (size_se < 0) { 1432 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se); 1433 return -EINVAL; 1434 } 1435 size_se = size_se ? size_se : default_size_se; 1436 1437 ngg_buf->size = size_se * adev->gfx.config.max_shader_engines; 1438 r = amdgpu_bo_create_kernel(adev, ngg_buf->size, 1439 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1440 &ngg_buf->bo, 1441 &ngg_buf->gpu_addr, 1442 NULL); 1443 if (r) { 1444 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r); 1445 return r; 1446 } 1447 ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo); 1448 1449 return r; 1450 } 1451 1452 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev) 1453 { 1454 int i; 1455 1456 for (i = 0; i < NGG_BUF_MAX; i++) 1457 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo, 1458 &adev->gfx.ngg.buf[i].gpu_addr, 1459 NULL); 1460 1461 memset(&adev->gfx.ngg.buf[0], 0, 1462 sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX); 1463 1464 adev->gfx.ngg.init = false; 1465 1466 return 0; 1467 } 1468 1469 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev) 1470 { 1471 int r; 1472 1473 if (!amdgpu_ngg || adev->gfx.ngg.init == true) 1474 return 0; 1475 1476 /* GDS reserve memory: 64 bytes alignment */ 1477 adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40); 1478 adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size; 1479 adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE); 1480 adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); 1481 1482 /* Primitive Buffer */ 1483 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM], 1484 amdgpu_prim_buf_per_se, 1485 64 * 1024); 1486 if (r) { 1487 dev_err(adev->dev, "Failed to create Primitive Buffer\n"); 1488 goto err; 1489 } 1490 1491 /* Position Buffer */ 1492 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS], 1493 amdgpu_pos_buf_per_se, 1494 256 * 1024); 1495 if (r) { 1496 dev_err(adev->dev, "Failed to create Position Buffer\n"); 1497 goto err; 1498 } 1499 1500 /* Control Sideband */ 1501 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL], 1502 amdgpu_cntl_sb_buf_per_se, 1503 256); 1504 if (r) { 1505 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n"); 1506 goto err; 1507 } 1508 1509 /* Parameter Cache, not created by default */ 1510 if (amdgpu_param_buf_per_se <= 0) 1511 goto out; 1512 1513 r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM], 1514 amdgpu_param_buf_per_se, 1515 512 * 1024); 1516 if (r) { 1517 dev_err(adev->dev, "Failed to create Parameter Cache\n"); 1518 goto err; 1519 } 1520 1521 out: 1522 adev->gfx.ngg.init = true; 1523 return 0; 1524 err: 1525 gfx_v9_0_ngg_fini(adev); 1526 return r; 1527 } 1528 1529 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) 1530 { 1531 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 1532 int r; 1533 u32 data, base; 1534 1535 if (!amdgpu_ngg) 1536 return 0; 1537 1538 /* Program buffer size */ 1539 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE, 1540 adev->gfx.ngg.buf[NGG_PRIM].size >> 8); 1541 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE, 1542 adev->gfx.ngg.buf[NGG_POS].size >> 8); 1543 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data); 1544 1545 data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE, 1546 adev->gfx.ngg.buf[NGG_CNTL].size >> 8); 1547 data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE, 1548 adev->gfx.ngg.buf[NGG_PARAM].size >> 10); 1549 WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data); 1550 1551 /* Program buffer base address */ 1552 base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1553 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base); 1554 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data); 1555 1556 base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr); 1557 data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base); 1558 WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data); 1559 1560 base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1561 data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base); 1562 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data); 1563 1564 base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr); 1565 data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base); 1566 WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data); 1567 1568 base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1569 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base); 1570 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data); 1571 1572 base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr); 1573 data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base); 1574 WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data); 1575 1576 /* Clear GDS reserved memory */ 1577 r = amdgpu_ring_alloc(ring, 17); 1578 if (r) { 1579 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", 1580 ring->name, r); 1581 return r; 1582 } 1583 1584 gfx_v9_0_write_data_to_reg(ring, 0, false, 1585 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 1586 (adev->gds.gds_size + 1587 adev->gfx.ngg.gds_reserve_size)); 1588 1589 amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); 1590 amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | 1591 PACKET3_DMA_DATA_DST_SEL(1) | 1592 PACKET3_DMA_DATA_SRC_SEL(2))); 1593 amdgpu_ring_write(ring, 0); 1594 amdgpu_ring_write(ring, 0); 1595 amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr); 1596 amdgpu_ring_write(ring, 0); 1597 amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT | 1598 adev->gfx.ngg.gds_reserve_size); 1599 1600 gfx_v9_0_write_data_to_reg(ring, 0, false, 1601 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0); 1602 1603 amdgpu_ring_commit(ring); 1604 1605 return 0; 1606 } 1607 1608 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1609 int mec, int pipe, int queue) 1610 { 1611 int r; 1612 unsigned irq_type; 1613 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1614 1615 ring = &adev->gfx.compute_ring[ring_id]; 1616 1617 /* mec0 is me1 */ 1618 ring->me = mec + 1; 1619 ring->pipe = pipe; 1620 ring->queue = queue; 1621 1622 ring->ring_obj = NULL; 1623 ring->use_doorbell = true; 1624 ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; 1625 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1626 + (ring_id * GFX9_MEC_HPD_SIZE); 1627 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1628 1629 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1630 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1631 + ring->pipe; 1632 1633 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1634 r = amdgpu_ring_init(adev, ring, 1024, 1635 &adev->gfx.eop_irq, irq_type); 1636 if (r) 1637 return r; 1638 1639 1640 return 0; 1641 } 1642 1643 static int gfx_v9_0_sw_init(void *handle) 1644 { 1645 int i, j, k, r, ring_id; 1646 struct amdgpu_ring *ring; 1647 struct amdgpu_kiq *kiq; 1648 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1649 1650 switch (adev->asic_type) { 1651 case CHIP_VEGA10: 1652 case CHIP_VEGA12: 1653 case CHIP_VEGA20: 1654 case CHIP_RAVEN: 1655 adev->gfx.mec.num_mec = 2; 1656 break; 1657 default: 1658 adev->gfx.mec.num_mec = 1; 1659 break; 1660 } 1661 1662 adev->gfx.mec.num_pipe_per_mec = 4; 1663 adev->gfx.mec.num_queue_per_pipe = 8; 1664 1665 /* EOP Event */ 1666 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); 1667 if (r) 1668 return r; 1669 1670 /* Privileged reg */ 1671 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT, 1672 &adev->gfx.priv_reg_irq); 1673 if (r) 1674 return r; 1675 1676 /* Privileged inst */ 1677 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT, 1678 &adev->gfx.priv_inst_irq); 1679 if (r) 1680 return r; 1681 1682 /* ECC error */ 1683 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR, 1684 &adev->gfx.cp_ecc_error_irq); 1685 if (r) 1686 return r; 1687 1688 /* FUE error */ 1689 r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR, 1690 &adev->gfx.cp_ecc_error_irq); 1691 if (r) 1692 return r; 1693 1694 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1695 1696 gfx_v9_0_scratch_init(adev); 1697 1698 r = gfx_v9_0_init_microcode(adev); 1699 if (r) { 1700 DRM_ERROR("Failed to load gfx firmware!\n"); 1701 return r; 1702 } 1703 1704 r = adev->gfx.rlc.funcs->init(adev); 1705 if (r) { 1706 DRM_ERROR("Failed to init rlc BOs!\n"); 1707 return r; 1708 } 1709 1710 r = gfx_v9_0_mec_init(adev); 1711 if (r) { 1712 DRM_ERROR("Failed to init MEC BOs!\n"); 1713 return r; 1714 } 1715 1716 /* set up the gfx ring */ 1717 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1718 ring = &adev->gfx.gfx_ring[i]; 1719 ring->ring_obj = NULL; 1720 if (!i) 1721 sprintf(ring->name, "gfx"); 1722 else 1723 sprintf(ring->name, "gfx_%d", i); 1724 ring->use_doorbell = true; 1725 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; 1726 r = amdgpu_ring_init(adev, ring, 1024, 1727 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); 1728 if (r) 1729 return r; 1730 } 1731 1732 /* set up the compute queues - allocate horizontally across pipes */ 1733 ring_id = 0; 1734 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1735 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1736 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1737 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 1738 continue; 1739 1740 r = gfx_v9_0_compute_ring_init(adev, 1741 ring_id, 1742 i, k, j); 1743 if (r) 1744 return r; 1745 1746 ring_id++; 1747 } 1748 } 1749 } 1750 1751 r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); 1752 if (r) { 1753 DRM_ERROR("Failed to init KIQ BOs!\n"); 1754 return r; 1755 } 1756 1757 kiq = &adev->gfx.kiq; 1758 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1759 if (r) 1760 return r; 1761 1762 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1763 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); 1764 if (r) 1765 return r; 1766 1767 adev->gfx.ce_ram_size = 0x8000; 1768 1769 r = gfx_v9_0_gpu_early_init(adev); 1770 if (r) 1771 return r; 1772 1773 r = gfx_v9_0_ngg_init(adev); 1774 if (r) 1775 return r; 1776 1777 return 0; 1778 } 1779 1780 1781 static int gfx_v9_0_sw_fini(void *handle) 1782 { 1783 int i; 1784 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1785 1786 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && 1787 adev->gfx.ras_if) { 1788 struct ras_common_if *ras_if = adev->gfx.ras_if; 1789 struct ras_ih_if ih_info = { 1790 .head = *ras_if, 1791 }; 1792 1793 amdgpu_ras_debugfs_remove(adev, ras_if); 1794 amdgpu_ras_sysfs_remove(adev, ras_if); 1795 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 1796 amdgpu_ras_feature_enable(adev, ras_if, 0); 1797 kfree(ras_if); 1798 } 1799 1800 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 1801 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 1802 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1803 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1804 1805 amdgpu_gfx_compute_mqd_sw_fini(adev); 1806 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1807 amdgpu_gfx_kiq_fini(adev); 1808 1809 gfx_v9_0_mec_fini(adev); 1810 gfx_v9_0_ngg_fini(adev); 1811 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1812 if (adev->asic_type == CHIP_RAVEN) { 1813 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 1814 &adev->gfx.rlc.cp_table_gpu_addr, 1815 (void **)&adev->gfx.rlc.cp_table_ptr); 1816 } 1817 gfx_v9_0_free_microcode(adev); 1818 1819 return 0; 1820 } 1821 1822 1823 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) 1824 { 1825 /* TODO */ 1826 } 1827 1828 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) 1829 { 1830 u32 data; 1831 1832 if (instance == 0xffffffff) 1833 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 1834 else 1835 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 1836 1837 if (se_num == 0xffffffff) 1838 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 1839 else 1840 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 1841 1842 if (sh_num == 0xffffffff) 1843 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 1844 else 1845 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 1846 1847 WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data); 1848 } 1849 1850 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) 1851 { 1852 u32 data, mask; 1853 1854 data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE); 1855 data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE); 1856 1857 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 1858 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 1859 1860 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 1861 adev->gfx.config.max_sh_per_se); 1862 1863 return (~data) & mask; 1864 } 1865 1866 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) 1867 { 1868 int i, j; 1869 u32 data; 1870 u32 active_rbs = 0; 1871 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 1872 adev->gfx.config.max_sh_per_se; 1873 1874 mutex_lock(&adev->grbm_idx_mutex); 1875 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1876 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1877 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1878 data = gfx_v9_0_get_rb_active_bitmap(adev); 1879 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 1880 rb_bitmap_width_per_sh); 1881 } 1882 } 1883 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1884 mutex_unlock(&adev->grbm_idx_mutex); 1885 1886 adev->gfx.config.backend_enable_mask = active_rbs; 1887 adev->gfx.config.num_rbs = hweight32(active_rbs); 1888 } 1889 1890 #define DEFAULT_SH_MEM_BASES (0x6000) 1891 #define FIRST_COMPUTE_VMID (8) 1892 #define LAST_COMPUTE_VMID (16) 1893 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) 1894 { 1895 int i; 1896 uint32_t sh_mem_config; 1897 uint32_t sh_mem_bases; 1898 1899 /* 1900 * Configure apertures: 1901 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 1902 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 1903 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 1904 */ 1905 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 1906 1907 sh_mem_config = SH_MEM_ADDRESS_MODE_64 | 1908 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 1909 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; 1910 1911 mutex_lock(&adev->srbm_mutex); 1912 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 1913 soc15_grbm_select(adev, 0, 0, 0, i); 1914 /* CP and shaders */ 1915 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); 1916 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); 1917 } 1918 soc15_grbm_select(adev, 0, 0, 0, 0); 1919 mutex_unlock(&adev->srbm_mutex); 1920 } 1921 1922 static void gfx_v9_0_constants_init(struct amdgpu_device *adev) 1923 { 1924 u32 tmp; 1925 int i; 1926 1927 WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); 1928 1929 gfx_v9_0_tiling_mode_table_init(adev); 1930 1931 gfx_v9_0_setup_rb(adev); 1932 gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info); 1933 adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2); 1934 1935 /* XXX SH_MEM regs */ 1936 /* where to put LDS, scratch, GPUVM in FSA64 space */ 1937 mutex_lock(&adev->srbm_mutex); 1938 for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { 1939 soc15_grbm_select(adev, 0, 0, 0, i); 1940 /* CP and shaders */ 1941 if (i == 0) { 1942 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1943 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1944 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1945 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0); 1946 } else { 1947 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, 1948 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 1949 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp); 1950 tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE, 1951 (adev->gmc.private_aperture_start >> 48)); 1952 tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE, 1953 (adev->gmc.shared_aperture_start >> 48)); 1954 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); 1955 } 1956 } 1957 soc15_grbm_select(adev, 0, 0, 0, 0); 1958 1959 mutex_unlock(&adev->srbm_mutex); 1960 1961 gfx_v9_0_init_compute_vmid(adev); 1962 1963 mutex_lock(&adev->grbm_idx_mutex); 1964 /* 1965 * making sure that the following register writes will be broadcasted 1966 * to all the shaders 1967 */ 1968 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 1969 1970 WREG32_SOC15_RLC(GC, 0, mmPA_SC_FIFO_SIZE, 1971 (adev->gfx.config.sc_prim_fifo_size_frontend << 1972 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 1973 (adev->gfx.config.sc_prim_fifo_size_backend << 1974 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 1975 (adev->gfx.config.sc_hiz_tile_fifo_size << 1976 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 1977 (adev->gfx.config.sc_earlyz_tile_fifo_size << 1978 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 1979 mutex_unlock(&adev->grbm_idx_mutex); 1980 1981 } 1982 1983 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 1984 { 1985 u32 i, j, k; 1986 u32 mask; 1987 1988 mutex_lock(&adev->grbm_idx_mutex); 1989 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 1990 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 1991 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 1992 for (k = 0; k < adev->usec_timeout; k++) { 1993 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) 1994 break; 1995 udelay(1); 1996 } 1997 if (k == adev->usec_timeout) { 1998 gfx_v9_0_select_se_sh(adev, 0xffffffff, 1999 0xffffffff, 0xffffffff); 2000 mutex_unlock(&adev->grbm_idx_mutex); 2001 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 2002 i, j); 2003 return; 2004 } 2005 } 2006 } 2007 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 2008 mutex_unlock(&adev->grbm_idx_mutex); 2009 2010 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 2011 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 2012 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 2013 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 2014 for (k = 0; k < adev->usec_timeout; k++) { 2015 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 2016 break; 2017 udelay(1); 2018 } 2019 } 2020 2021 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 2022 bool enable) 2023 { 2024 u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); 2025 2026 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 2027 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 2028 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 2029 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 2030 2031 WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); 2032 } 2033 2034 static void gfx_v9_0_init_csb(struct amdgpu_device *adev) 2035 { 2036 /* csib */ 2037 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), 2038 adev->gfx.rlc.clear_state_gpu_addr >> 32); 2039 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), 2040 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 2041 WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), 2042 adev->gfx.rlc.clear_state_size); 2043 } 2044 2045 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format, 2046 int indirect_offset, 2047 int list_size, 2048 int *unique_indirect_regs, 2049 int unique_indirect_reg_count, 2050 int *indirect_start_offsets, 2051 int *indirect_start_offsets_count, 2052 int max_start_offsets_count) 2053 { 2054 int idx; 2055 2056 for (; indirect_offset < list_size; indirect_offset++) { 2057 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count); 2058 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; 2059 *indirect_start_offsets_count = *indirect_start_offsets_count + 1; 2060 2061 while (register_list_format[indirect_offset] != 0xFFFFFFFF) { 2062 indirect_offset += 2; 2063 2064 /* look for the matching indice */ 2065 for (idx = 0; idx < unique_indirect_reg_count; idx++) { 2066 if (unique_indirect_regs[idx] == 2067 register_list_format[indirect_offset] || 2068 !unique_indirect_regs[idx]) 2069 break; 2070 } 2071 2072 BUG_ON(idx >= unique_indirect_reg_count); 2073 2074 if (!unique_indirect_regs[idx]) 2075 unique_indirect_regs[idx] = register_list_format[indirect_offset]; 2076 2077 indirect_offset++; 2078 } 2079 } 2080 } 2081 2082 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev) 2083 { 2084 int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2085 int unique_indirect_reg_count = 0; 2086 2087 int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; 2088 int indirect_start_offsets_count = 0; 2089 2090 int list_size = 0; 2091 int i = 0, j = 0; 2092 u32 tmp = 0; 2093 2094 u32 *register_list_format = 2095 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 2096 if (!register_list_format) 2097 return -ENOMEM; 2098 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 2099 adev->gfx.rlc.reg_list_format_size_bytes); 2100 2101 /* setup unique_indirect_regs array and indirect_start_offsets array */ 2102 unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs); 2103 gfx_v9_1_parse_ind_reg_list(register_list_format, 2104 adev->gfx.rlc.reg_list_format_direct_reg_list_length, 2105 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 2106 unique_indirect_regs, 2107 unique_indirect_reg_count, 2108 indirect_start_offsets, 2109 &indirect_start_offsets_count, 2110 ARRAY_SIZE(indirect_start_offsets)); 2111 2112 /* enable auto inc in case it is disabled */ 2113 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); 2114 tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 2115 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); 2116 2117 /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ 2118 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 2119 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); 2120 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 2121 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), 2122 adev->gfx.rlc.register_restore[i]); 2123 2124 /* load indirect register */ 2125 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2126 adev->gfx.rlc.reg_list_format_start); 2127 2128 /* direct register portion */ 2129 for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++) 2130 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2131 register_list_format[i]); 2132 2133 /* indirect register portion */ 2134 while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) { 2135 if (register_list_format[i] == 0xFFFFFFFF) { 2136 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2137 continue; 2138 } 2139 2140 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2141 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]); 2142 2143 for (j = 0; j < unique_indirect_reg_count; j++) { 2144 if (register_list_format[i] == unique_indirect_regs[j]) { 2145 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j); 2146 break; 2147 } 2148 } 2149 2150 BUG_ON(j >= unique_indirect_reg_count); 2151 2152 i++; 2153 } 2154 2155 /* set save/restore list size */ 2156 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 2157 list_size = list_size >> 1; 2158 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2159 adev->gfx.rlc.reg_restore_list_size); 2160 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); 2161 2162 /* write the starting offsets to RLC scratch ram */ 2163 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), 2164 adev->gfx.rlc.starting_offsets_start); 2165 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 2166 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), 2167 indirect_start_offsets[i]); 2168 2169 /* load unique indirect regs*/ 2170 for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { 2171 if (unique_indirect_regs[i] != 0) { 2172 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) 2173 + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i], 2174 unique_indirect_regs[i] & 0x3FFFF); 2175 2176 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) 2177 + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i], 2178 unique_indirect_regs[i] >> 20); 2179 } 2180 } 2181 2182 kfree(register_list_format); 2183 return 0; 2184 } 2185 2186 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) 2187 { 2188 WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1); 2189 } 2190 2191 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, 2192 bool enable) 2193 { 2194 uint32_t data = 0; 2195 uint32_t default_data = 0; 2196 2197 default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); 2198 if (enable == true) { 2199 /* enable GFXIP control over CGPG */ 2200 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2201 if(default_data != data) 2202 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2203 2204 /* update status */ 2205 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; 2206 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); 2207 if(default_data != data) 2208 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2209 } else { 2210 /* restore GFXIP control over GCPG */ 2211 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; 2212 if(default_data != data) 2213 WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); 2214 } 2215 } 2216 2217 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) 2218 { 2219 uint32_t data = 0; 2220 2221 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2222 AMD_PG_SUPPORT_GFX_SMG | 2223 AMD_PG_SUPPORT_GFX_DMG)) { 2224 /* init IDLE_POLL_COUNT = 60 */ 2225 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); 2226 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 2227 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 2228 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); 2229 2230 /* init RLC PG Delay */ 2231 data = 0; 2232 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 2233 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 2234 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 2235 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 2236 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); 2237 2238 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); 2239 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 2240 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 2241 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); 2242 2243 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); 2244 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; 2245 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); 2246 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); 2247 2248 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); 2249 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 2250 2251 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ 2252 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 2253 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); 2254 2255 pwr_10_0_gfxip_control_over_cgpg(adev, true); 2256 } 2257 } 2258 2259 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 2260 bool enable) 2261 { 2262 uint32_t data = 0; 2263 uint32_t default_data = 0; 2264 2265 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2266 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2267 SMU_CLK_SLOWDOWN_ON_PU_ENABLE, 2268 enable ? 1 : 0); 2269 if (default_data != data) 2270 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2271 } 2272 2273 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 2274 bool enable) 2275 { 2276 uint32_t data = 0; 2277 uint32_t default_data = 0; 2278 2279 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2280 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2281 SMU_CLK_SLOWDOWN_ON_PD_ENABLE, 2282 enable ? 1 : 0); 2283 if(default_data != data) 2284 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2285 } 2286 2287 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, 2288 bool enable) 2289 { 2290 uint32_t data = 0; 2291 uint32_t default_data = 0; 2292 2293 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2294 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2295 CP_PG_DISABLE, 2296 enable ? 0 : 1); 2297 if(default_data != data) 2298 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2299 } 2300 2301 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 2302 bool enable) 2303 { 2304 uint32_t data, default_data; 2305 2306 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2307 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2308 GFX_POWER_GATING_ENABLE, 2309 enable ? 1 : 0); 2310 if(default_data != data) 2311 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2312 } 2313 2314 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, 2315 bool enable) 2316 { 2317 uint32_t data, default_data; 2318 2319 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2320 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2321 GFX_PIPELINE_PG_ENABLE, 2322 enable ? 1 : 0); 2323 if(default_data != data) 2324 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2325 2326 if (!enable) 2327 /* read any GFX register to wake up GFX */ 2328 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); 2329 } 2330 2331 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 2332 bool enable) 2333 { 2334 uint32_t data, default_data; 2335 2336 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2337 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2338 STATIC_PER_CU_PG_ENABLE, 2339 enable ? 1 : 0); 2340 if(default_data != data) 2341 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2342 } 2343 2344 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 2345 bool enable) 2346 { 2347 uint32_t data, default_data; 2348 2349 default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); 2350 data = REG_SET_FIELD(data, RLC_PG_CNTL, 2351 DYN_PER_CU_PG_ENABLE, 2352 enable ? 1 : 0); 2353 if(default_data != data) 2354 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); 2355 } 2356 2357 static void gfx_v9_0_init_pg(struct amdgpu_device *adev) 2358 { 2359 gfx_v9_0_init_csb(adev); 2360 2361 /* 2362 * Rlc save restore list is workable since v2_1. 2363 * And it's needed by gfxoff feature. 2364 */ 2365 if (adev->gfx.rlc.is_rlc_v2_1) { 2366 gfx_v9_1_init_rlc_save_restore_list(adev); 2367 gfx_v9_0_enable_save_restore_machine(adev); 2368 } 2369 2370 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 2371 AMD_PG_SUPPORT_GFX_SMG | 2372 AMD_PG_SUPPORT_GFX_DMG | 2373 AMD_PG_SUPPORT_CP | 2374 AMD_PG_SUPPORT_GDS | 2375 AMD_PG_SUPPORT_RLC_SMU_HS)) { 2376 WREG32(mmRLC_JUMP_TABLE_RESTORE, 2377 adev->gfx.rlc.cp_table_gpu_addr >> 8); 2378 gfx_v9_0_init_gfx_power_gating(adev); 2379 } 2380 } 2381 2382 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) 2383 { 2384 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); 2385 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 2386 gfx_v9_0_wait_for_rlc_serdes(adev); 2387 } 2388 2389 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev) 2390 { 2391 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 2392 udelay(50); 2393 WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 2394 udelay(50); 2395 } 2396 2397 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) 2398 { 2399 #ifdef AMDGPU_RLC_DEBUG_RETRY 2400 u32 rlc_ucode_ver; 2401 #endif 2402 2403 WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); 2404 udelay(50); 2405 2406 /* carrizo do enable cp interrupt after cp inited */ 2407 if (!(adev->flags & AMD_IS_APU)) { 2408 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2409 udelay(50); 2410 } 2411 2412 #ifdef AMDGPU_RLC_DEBUG_RETRY 2413 /* RLC_GPM_GENERAL_6 : RLC Ucode version */ 2414 rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6); 2415 if(rlc_ucode_ver == 0x108) { 2416 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n", 2417 rlc_ucode_ver, adev->gfx.rlc_fw_version); 2418 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles, 2419 * default is 0x9C4 to create a 100us interval */ 2420 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); 2421 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr 2422 * to disable the page fault retry interrupts, default is 2423 * 0x100 (256) */ 2424 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); 2425 } 2426 #endif 2427 } 2428 2429 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev) 2430 { 2431 const struct rlc_firmware_header_v2_0 *hdr; 2432 const __le32 *fw_data; 2433 unsigned i, fw_size; 2434 2435 if (!adev->gfx.rlc_fw) 2436 return -EINVAL; 2437 2438 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 2439 amdgpu_ucode_print_rlc_hdr(&hdr->header); 2440 2441 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 2442 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 2443 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 2444 2445 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, 2446 RLCG_UCODE_LOADING_START_ADDRESS); 2447 for (i = 0; i < fw_size; i++) 2448 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 2449 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 2450 2451 return 0; 2452 } 2453 2454 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) 2455 { 2456 int r; 2457 2458 if (amdgpu_sriov_vf(adev)) { 2459 gfx_v9_0_init_csb(adev); 2460 return 0; 2461 } 2462 2463 adev->gfx.rlc.funcs->stop(adev); 2464 2465 /* disable CG */ 2466 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); 2467 2468 gfx_v9_0_init_pg(adev); 2469 2470 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 2471 /* legacy rlc firmware loading */ 2472 r = gfx_v9_0_rlc_load_microcode(adev); 2473 if (r) 2474 return r; 2475 } 2476 2477 switch (adev->asic_type) { 2478 case CHIP_RAVEN: 2479 if (amdgpu_lbpw == 0) 2480 gfx_v9_0_enable_lbpw(adev, false); 2481 else 2482 gfx_v9_0_enable_lbpw(adev, true); 2483 break; 2484 case CHIP_VEGA20: 2485 if (amdgpu_lbpw > 0) 2486 gfx_v9_0_enable_lbpw(adev, true); 2487 else 2488 gfx_v9_0_enable_lbpw(adev, false); 2489 break; 2490 default: 2491 break; 2492 } 2493 2494 adev->gfx.rlc.funcs->start(adev); 2495 2496 return 0; 2497 } 2498 2499 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 2500 { 2501 int i; 2502 u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); 2503 2504 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1); 2505 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); 2506 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); 2507 if (!enable) { 2508 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2509 adev->gfx.gfx_ring[i].sched.ready = false; 2510 } 2511 WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); 2512 udelay(50); 2513 } 2514 2515 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 2516 { 2517 const struct gfx_firmware_header_v1_0 *pfp_hdr; 2518 const struct gfx_firmware_header_v1_0 *ce_hdr; 2519 const struct gfx_firmware_header_v1_0 *me_hdr; 2520 const __le32 *fw_data; 2521 unsigned i, fw_size; 2522 2523 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 2524 return -EINVAL; 2525 2526 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 2527 adev->gfx.pfp_fw->data; 2528 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 2529 adev->gfx.ce_fw->data; 2530 me_hdr = (const struct gfx_firmware_header_v1_0 *) 2531 adev->gfx.me_fw->data; 2532 2533 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 2534 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 2535 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 2536 2537 gfx_v9_0_cp_gfx_enable(adev, false); 2538 2539 /* PFP */ 2540 fw_data = (const __le32 *) 2541 (adev->gfx.pfp_fw->data + 2542 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 2543 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 2544 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0); 2545 for (i = 0; i < fw_size; i++) 2546 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 2547 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 2548 2549 /* CE */ 2550 fw_data = (const __le32 *) 2551 (adev->gfx.ce_fw->data + 2552 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 2553 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 2554 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0); 2555 for (i = 0; i < fw_size; i++) 2556 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 2557 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 2558 2559 /* ME */ 2560 fw_data = (const __le32 *) 2561 (adev->gfx.me_fw->data + 2562 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 2563 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 2564 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0); 2565 for (i = 0; i < fw_size; i++) 2566 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 2567 WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 2568 2569 return 0; 2570 } 2571 2572 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) 2573 { 2574 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 2575 const struct cs_section_def *sect = NULL; 2576 const struct cs_extent_def *ext = NULL; 2577 int r, i, tmp; 2578 2579 /* init the CP */ 2580 WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 2581 WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1); 2582 2583 gfx_v9_0_cp_gfx_enable(adev, true); 2584 2585 r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); 2586 if (r) { 2587 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 2588 return r; 2589 } 2590 2591 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2592 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 2593 2594 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 2595 amdgpu_ring_write(ring, 0x80000000); 2596 amdgpu_ring_write(ring, 0x80000000); 2597 2598 for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { 2599 for (ext = sect->section; ext->extent != NULL; ++ext) { 2600 if (sect->id == SECT_CONTEXT) { 2601 amdgpu_ring_write(ring, 2602 PACKET3(PACKET3_SET_CONTEXT_REG, 2603 ext->reg_count)); 2604 amdgpu_ring_write(ring, 2605 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 2606 for (i = 0; i < ext->reg_count; i++) 2607 amdgpu_ring_write(ring, ext->extent[i]); 2608 } 2609 } 2610 } 2611 2612 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 2613 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 2614 2615 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 2616 amdgpu_ring_write(ring, 0); 2617 2618 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 2619 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 2620 amdgpu_ring_write(ring, 0x8000); 2621 amdgpu_ring_write(ring, 0x8000); 2622 2623 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1)); 2624 tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE | 2625 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START)); 2626 amdgpu_ring_write(ring, tmp); 2627 amdgpu_ring_write(ring, 0); 2628 2629 amdgpu_ring_commit(ring); 2630 2631 return 0; 2632 } 2633 2634 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) 2635 { 2636 struct amdgpu_ring *ring; 2637 u32 tmp; 2638 u32 rb_bufsz; 2639 u64 rb_addr, rptr_addr, wptr_gpu_addr; 2640 2641 /* Set the write pointer delay */ 2642 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0); 2643 2644 /* set the RB to use vmid 0 */ 2645 WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0); 2646 2647 /* Set ring buffer size */ 2648 ring = &adev->gfx.gfx_ring[0]; 2649 rb_bufsz = order_base_2(ring->ring_size / 8); 2650 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 2651 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 2652 #ifdef __BIG_ENDIAN 2653 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 2654 #endif 2655 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2656 2657 /* Initialize the ring buffer's write pointers */ 2658 ring->wptr = 0; 2659 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 2660 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 2661 2662 /* set the wb address wether it's enabled or not */ 2663 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2664 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 2665 WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK); 2666 2667 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2668 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 2669 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 2670 2671 mdelay(1); 2672 WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp); 2673 2674 rb_addr = ring->gpu_addr >> 8; 2675 WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr); 2676 WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 2677 2678 tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); 2679 if (ring->use_doorbell) { 2680 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2681 DOORBELL_OFFSET, ring->doorbell_index); 2682 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 2683 DOORBELL_EN, 1); 2684 } else { 2685 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 2686 } 2687 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); 2688 2689 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 2690 DOORBELL_RANGE_LOWER, ring->doorbell_index); 2691 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 2692 2693 WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER, 2694 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 2695 2696 2697 /* start the ring */ 2698 gfx_v9_0_cp_gfx_start(adev); 2699 ring->sched.ready = true; 2700 2701 return 0; 2702 } 2703 2704 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 2705 { 2706 int i; 2707 2708 if (enable) { 2709 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0); 2710 } else { 2711 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 2712 (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 2713 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2714 adev->gfx.compute_ring[i].sched.ready = false; 2715 adev->gfx.kiq.ring.sched.ready = false; 2716 } 2717 udelay(50); 2718 } 2719 2720 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2721 { 2722 const struct gfx_firmware_header_v1_0 *mec_hdr; 2723 const __le32 *fw_data; 2724 unsigned i; 2725 u32 tmp; 2726 2727 if (!adev->gfx.mec_fw) 2728 return -EINVAL; 2729 2730 gfx_v9_0_cp_compute_enable(adev, false); 2731 2732 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 2733 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 2734 2735 fw_data = (const __le32 *) 2736 (adev->gfx.mec_fw->data + 2737 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 2738 tmp = 0; 2739 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0); 2740 tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); 2741 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp); 2742 2743 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO, 2744 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); 2745 WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, 2746 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); 2747 2748 /* MEC1 */ 2749 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2750 mec_hdr->jt_offset); 2751 for (i = 0; i < mec_hdr->jt_size; i++) 2752 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA, 2753 le32_to_cpup(fw_data + mec_hdr->jt_offset + i)); 2754 2755 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, 2756 adev->gfx.mec_fw_version); 2757 /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 2758 2759 return 0; 2760 } 2761 2762 /* KIQ functions */ 2763 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2764 { 2765 uint32_t tmp; 2766 struct amdgpu_device *adev = ring->adev; 2767 2768 /* tell RLC which is KIQ queue */ 2769 tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); 2770 tmp &= 0xffffff00; 2771 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 2772 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2773 tmp |= 0x80; 2774 WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); 2775 } 2776 2777 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) 2778 { 2779 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 2780 uint64_t queue_mask = 0; 2781 int r, i; 2782 2783 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 2784 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 2785 continue; 2786 2787 /* This situation may be hit in the future if a new HW 2788 * generation exposes more than 64 queues. If so, the 2789 * definition of queue_mask needs updating */ 2790 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 2791 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 2792 break; 2793 } 2794 2795 queue_mask |= (1ull << i); 2796 } 2797 2798 r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); 2799 if (r) { 2800 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 2801 return r; 2802 } 2803 2804 /* set resources */ 2805 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 2806 amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | 2807 PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ 2808 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 2809 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 2810 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 2811 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 2812 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 2813 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 2814 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2815 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 2816 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 2817 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2818 2819 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 2820 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 2821 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 2822 PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ 2823 PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ 2824 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 2825 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 2826 PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | 2827 PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ 2828 PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */ 2829 PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ 2830 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ 2831 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); 2832 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 2833 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 2834 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 2835 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 2836 } 2837 2838 r = amdgpu_ring_test_helper(kiq_ring); 2839 if (r) 2840 DRM_ERROR("KCQ enable failed\n"); 2841 2842 return r; 2843 } 2844 2845 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2846 { 2847 struct amdgpu_device *adev = ring->adev; 2848 struct v9_mqd *mqd = ring->mqd_ptr; 2849 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 2850 uint32_t tmp; 2851 2852 mqd->header = 0xC0310800; 2853 mqd->compute_pipelinestat_enable = 0x00000001; 2854 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 2855 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 2856 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 2857 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 2858 mqd->compute_misc_reserved = 0x00000003; 2859 2860 mqd->dynamic_cu_mask_addr_lo = 2861 lower_32_bits(ring->mqd_gpu_addr 2862 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2863 mqd->dynamic_cu_mask_addr_hi = 2864 upper_32_bits(ring->mqd_gpu_addr 2865 + offsetof(struct v9_mqd_allocation, dynamic_cu_mask)); 2866 2867 eop_base_addr = ring->eop_gpu_addr >> 8; 2868 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 2869 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 2870 2871 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2872 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); 2873 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 2874 (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); 2875 2876 mqd->cp_hqd_eop_control = tmp; 2877 2878 /* enable doorbell? */ 2879 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2880 2881 if (ring->use_doorbell) { 2882 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2883 DOORBELL_OFFSET, ring->doorbell_index); 2884 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2885 DOORBELL_EN, 1); 2886 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2887 DOORBELL_SOURCE, 0); 2888 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2889 DOORBELL_HIT, 0); 2890 } else { 2891 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2892 DOORBELL_EN, 0); 2893 } 2894 2895 mqd->cp_hqd_pq_doorbell_control = tmp; 2896 2897 /* disable the queue if it's active */ 2898 ring->wptr = 0; 2899 mqd->cp_hqd_dequeue_request = 0; 2900 mqd->cp_hqd_pq_rptr = 0; 2901 mqd->cp_hqd_pq_wptr_lo = 0; 2902 mqd->cp_hqd_pq_wptr_hi = 0; 2903 2904 /* set the pointer to the MQD */ 2905 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 2906 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 2907 2908 /* set MQD vmid to 0 */ 2909 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); 2910 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 2911 mqd->cp_mqd_control = tmp; 2912 2913 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 2914 hqd_gpu_addr = ring->gpu_addr >> 8; 2915 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 2916 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 2917 2918 /* set up the HQD, this is similar to CP_RB0_CNTL */ 2919 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); 2920 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 2921 (order_base_2(ring->ring_size / 4) - 1)); 2922 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 2923 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 2924 #ifdef __BIG_ENDIAN 2925 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 2926 #endif 2927 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 2928 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 2929 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 2930 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 2931 mqd->cp_hqd_pq_control = tmp; 2932 2933 /* set the wb address whether it's enabled or not */ 2934 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 2935 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 2936 mqd->cp_hqd_pq_rptr_report_addr_hi = 2937 upper_32_bits(wb_gpu_addr) & 0xffff; 2938 2939 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 2940 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 2941 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 2942 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 2943 2944 tmp = 0; 2945 /* enable the doorbell if requested */ 2946 if (ring->use_doorbell) { 2947 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); 2948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2949 DOORBELL_OFFSET, ring->doorbell_index); 2950 2951 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2952 DOORBELL_EN, 1); 2953 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2954 DOORBELL_SOURCE, 0); 2955 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 2956 DOORBELL_HIT, 0); 2957 } 2958 2959 mqd->cp_hqd_pq_doorbell_control = tmp; 2960 2961 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 2962 ring->wptr = 0; 2963 mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR); 2964 2965 /* set the vmid for the queue */ 2966 mqd->cp_hqd_vmid = 0; 2967 2968 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); 2969 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 2970 mqd->cp_hqd_persistent_state = tmp; 2971 2972 /* set MIN_IB_AVAIL_SIZE */ 2973 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL); 2974 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 2975 mqd->cp_hqd_ib_control = tmp; 2976 2977 /* activate the queue */ 2978 mqd->cp_hqd_active = 1; 2979 2980 return 0; 2981 } 2982 2983 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) 2984 { 2985 struct amdgpu_device *adev = ring->adev; 2986 struct v9_mqd *mqd = ring->mqd_ptr; 2987 int j; 2988 2989 /* disable wptr polling */ 2990 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 2991 2992 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR, 2993 mqd->cp_hqd_eop_base_addr_lo); 2994 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, 2995 mqd->cp_hqd_eop_base_addr_hi); 2996 2997 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 2998 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL, 2999 mqd->cp_hqd_eop_control); 3000 3001 /* enable doorbell? */ 3002 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3003 mqd->cp_hqd_pq_doorbell_control); 3004 3005 /* disable the queue if it's active */ 3006 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3007 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3008 for (j = 0; j < adev->usec_timeout; j++) { 3009 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3010 break; 3011 udelay(1); 3012 } 3013 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3014 mqd->cp_hqd_dequeue_request); 3015 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 3016 mqd->cp_hqd_pq_rptr); 3017 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3018 mqd->cp_hqd_pq_wptr_lo); 3019 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3020 mqd->cp_hqd_pq_wptr_hi); 3021 } 3022 3023 /* set the pointer to the MQD */ 3024 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR, 3025 mqd->cp_mqd_base_addr_lo); 3026 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI, 3027 mqd->cp_mqd_base_addr_hi); 3028 3029 /* set MQD vmid to 0 */ 3030 WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL, 3031 mqd->cp_mqd_control); 3032 3033 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 3034 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE, 3035 mqd->cp_hqd_pq_base_lo); 3036 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI, 3037 mqd->cp_hqd_pq_base_hi); 3038 3039 /* set up the HQD, this is similar to CP_RB0_CNTL */ 3040 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL, 3041 mqd->cp_hqd_pq_control); 3042 3043 /* set the wb address whether it's enabled or not */ 3044 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, 3045 mqd->cp_hqd_pq_rptr_report_addr_lo); 3046 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 3047 mqd->cp_hqd_pq_rptr_report_addr_hi); 3048 3049 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 3050 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, 3051 mqd->cp_hqd_pq_wptr_poll_addr_lo); 3052 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 3053 mqd->cp_hqd_pq_wptr_poll_addr_hi); 3054 3055 /* enable the doorbell if requested */ 3056 if (ring->use_doorbell) { 3057 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3058 (adev->doorbell_index.kiq * 2) << 2); 3059 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3060 (adev->doorbell_index.userqueue_end * 2) << 2); 3061 } 3062 3063 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 3064 mqd->cp_hqd_pq_doorbell_control); 3065 3066 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 3067 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 3068 mqd->cp_hqd_pq_wptr_lo); 3069 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 3070 mqd->cp_hqd_pq_wptr_hi); 3071 3072 /* set the vmid for the queue */ 3073 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); 3074 3075 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 3076 mqd->cp_hqd_persistent_state); 3077 3078 /* activate the queue */ 3079 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 3080 mqd->cp_hqd_active); 3081 3082 if (ring->use_doorbell) 3083 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 3084 3085 return 0; 3086 } 3087 3088 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring) 3089 { 3090 struct amdgpu_device *adev = ring->adev; 3091 int j; 3092 3093 /* disable the queue if it's active */ 3094 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { 3095 3096 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); 3097 3098 for (j = 0; j < adev->usec_timeout; j++) { 3099 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) 3100 break; 3101 udelay(1); 3102 } 3103 3104 if (j == AMDGPU_MAX_USEC_TIMEOUT) { 3105 DRM_DEBUG("KIQ dequeue request failed.\n"); 3106 3107 /* Manual disable if dequeue request times out */ 3108 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0); 3109 } 3110 3111 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 3112 0); 3113 } 3114 3115 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0); 3116 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0); 3117 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0); 3118 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000); 3119 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0); 3120 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0); 3121 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0); 3122 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0); 3123 3124 return 0; 3125 } 3126 3127 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) 3128 { 3129 struct amdgpu_device *adev = ring->adev; 3130 struct v9_mqd *mqd = ring->mqd_ptr; 3131 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 3132 3133 gfx_v9_0_kiq_setting(ring); 3134 3135 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3136 /* reset MQD to a clean status */ 3137 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3138 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3139 3140 /* reset ring buffer */ 3141 ring->wptr = 0; 3142 amdgpu_ring_clear_ring(ring); 3143 3144 mutex_lock(&adev->srbm_mutex); 3145 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3146 gfx_v9_0_kiq_init_register(ring); 3147 soc15_grbm_select(adev, 0, 0, 0, 0); 3148 mutex_unlock(&adev->srbm_mutex); 3149 } else { 3150 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3151 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3152 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3153 mutex_lock(&adev->srbm_mutex); 3154 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3155 gfx_v9_0_mqd_init(ring); 3156 gfx_v9_0_kiq_init_register(ring); 3157 soc15_grbm_select(adev, 0, 0, 0, 0); 3158 mutex_unlock(&adev->srbm_mutex); 3159 3160 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3161 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3162 } 3163 3164 return 0; 3165 } 3166 3167 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) 3168 { 3169 struct amdgpu_device *adev = ring->adev; 3170 struct v9_mqd *mqd = ring->mqd_ptr; 3171 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 3172 3173 if (!adev->in_gpu_reset && !adev->in_suspend) { 3174 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); 3175 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 3176 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 3177 mutex_lock(&adev->srbm_mutex); 3178 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 3179 gfx_v9_0_mqd_init(ring); 3180 soc15_grbm_select(adev, 0, 0, 0, 0); 3181 mutex_unlock(&adev->srbm_mutex); 3182 3183 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3184 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); 3185 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 3186 /* reset MQD to a clean status */ 3187 if (adev->gfx.mec.mqd_backup[mqd_idx]) 3188 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); 3189 3190 /* reset ring buffer */ 3191 ring->wptr = 0; 3192 amdgpu_ring_clear_ring(ring); 3193 } else { 3194 amdgpu_ring_clear_ring(ring); 3195 } 3196 3197 return 0; 3198 } 3199 3200 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) 3201 { 3202 struct amdgpu_ring *ring; 3203 int r; 3204 3205 ring = &adev->gfx.kiq.ring; 3206 3207 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3208 if (unlikely(r != 0)) 3209 return r; 3210 3211 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3212 if (unlikely(r != 0)) 3213 return r; 3214 3215 gfx_v9_0_kiq_init_queue(ring); 3216 amdgpu_bo_kunmap(ring->mqd_obj); 3217 ring->mqd_ptr = NULL; 3218 amdgpu_bo_unreserve(ring->mqd_obj); 3219 ring->sched.ready = true; 3220 return 0; 3221 } 3222 3223 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) 3224 { 3225 struct amdgpu_ring *ring = NULL; 3226 int r = 0, i; 3227 3228 gfx_v9_0_cp_compute_enable(adev, true); 3229 3230 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3231 ring = &adev->gfx.compute_ring[i]; 3232 3233 r = amdgpu_bo_reserve(ring->mqd_obj, false); 3234 if (unlikely(r != 0)) 3235 goto done; 3236 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); 3237 if (!r) { 3238 r = gfx_v9_0_kcq_init_queue(ring); 3239 amdgpu_bo_kunmap(ring->mqd_obj); 3240 ring->mqd_ptr = NULL; 3241 } 3242 amdgpu_bo_unreserve(ring->mqd_obj); 3243 if (r) 3244 goto done; 3245 } 3246 3247 r = gfx_v9_0_kiq_kcq_enable(adev); 3248 done: 3249 return r; 3250 } 3251 3252 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) 3253 { 3254 int r, i; 3255 struct amdgpu_ring *ring; 3256 3257 if (!(adev->flags & AMD_IS_APU)) 3258 gfx_v9_0_enable_gui_idle_interrupt(adev, false); 3259 3260 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { 3261 /* legacy firmware loading */ 3262 r = gfx_v9_0_cp_gfx_load_microcode(adev); 3263 if (r) 3264 return r; 3265 3266 r = gfx_v9_0_cp_compute_load_microcode(adev); 3267 if (r) 3268 return r; 3269 } 3270 3271 r = gfx_v9_0_kiq_resume(adev); 3272 if (r) 3273 return r; 3274 3275 r = gfx_v9_0_cp_gfx_resume(adev); 3276 if (r) 3277 return r; 3278 3279 r = gfx_v9_0_kcq_resume(adev); 3280 if (r) 3281 return r; 3282 3283 ring = &adev->gfx.gfx_ring[0]; 3284 r = amdgpu_ring_test_helper(ring); 3285 if (r) 3286 return r; 3287 3288 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3289 ring = &adev->gfx.compute_ring[i]; 3290 amdgpu_ring_test_helper(ring); 3291 } 3292 3293 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 3294 3295 return 0; 3296 } 3297 3298 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) 3299 { 3300 gfx_v9_0_cp_gfx_enable(adev, enable); 3301 gfx_v9_0_cp_compute_enable(adev, enable); 3302 } 3303 3304 static int gfx_v9_0_hw_init(void *handle) 3305 { 3306 int r; 3307 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3308 3309 gfx_v9_0_init_golden_registers(adev); 3310 3311 gfx_v9_0_constants_init(adev); 3312 3313 r = gfx_v9_0_csb_vram_pin(adev); 3314 if (r) 3315 return r; 3316 3317 r = adev->gfx.rlc.funcs->resume(adev); 3318 if (r) 3319 return r; 3320 3321 r = gfx_v9_0_cp_resume(adev); 3322 if (r) 3323 return r; 3324 3325 r = gfx_v9_0_ngg_en(adev); 3326 if (r) 3327 return r; 3328 3329 return r; 3330 } 3331 3332 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) 3333 { 3334 int r, i; 3335 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 3336 3337 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 3338 if (r) 3339 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 3340 3341 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 3342 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 3343 3344 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 3345 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 3346 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 3347 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 3348 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 3349 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 3350 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 3351 amdgpu_ring_write(kiq_ring, 0); 3352 amdgpu_ring_write(kiq_ring, 0); 3353 amdgpu_ring_write(kiq_ring, 0); 3354 } 3355 r = amdgpu_ring_test_helper(kiq_ring); 3356 if (r) 3357 DRM_ERROR("KCQ disable failed\n"); 3358 3359 return r; 3360 } 3361 3362 static int gfx_v9_0_hw_fini(void *handle) 3363 { 3364 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3365 3366 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 3367 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 3368 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 3369 3370 /* disable KCQ to avoid CPC touch memory not valid anymore */ 3371 gfx_v9_0_kcq_disable(adev); 3372 3373 if (amdgpu_sriov_vf(adev)) { 3374 gfx_v9_0_cp_gfx_enable(adev, false); 3375 /* must disable polling for SRIOV when hw finished, otherwise 3376 * CPC engine may still keep fetching WB address which is already 3377 * invalid after sw finished and trigger DMAR reading error in 3378 * hypervisor side. 3379 */ 3380 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); 3381 return 0; 3382 } 3383 3384 /* Use deinitialize sequence from CAIL when unbinding device from driver, 3385 * otherwise KIQ is hanging when binding back 3386 */ 3387 if (!adev->in_gpu_reset && !adev->in_suspend) { 3388 mutex_lock(&adev->srbm_mutex); 3389 soc15_grbm_select(adev, adev->gfx.kiq.ring.me, 3390 adev->gfx.kiq.ring.pipe, 3391 adev->gfx.kiq.ring.queue, 0); 3392 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); 3393 soc15_grbm_select(adev, 0, 0, 0, 0); 3394 mutex_unlock(&adev->srbm_mutex); 3395 } 3396 3397 gfx_v9_0_cp_enable(adev, false); 3398 adev->gfx.rlc.funcs->stop(adev); 3399 3400 gfx_v9_0_csb_vram_unpin(adev); 3401 3402 return 0; 3403 } 3404 3405 static int gfx_v9_0_suspend(void *handle) 3406 { 3407 return gfx_v9_0_hw_fini(handle); 3408 } 3409 3410 static int gfx_v9_0_resume(void *handle) 3411 { 3412 return gfx_v9_0_hw_init(handle); 3413 } 3414 3415 static bool gfx_v9_0_is_idle(void *handle) 3416 { 3417 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3418 3419 if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), 3420 GRBM_STATUS, GUI_ACTIVE)) 3421 return false; 3422 else 3423 return true; 3424 } 3425 3426 static int gfx_v9_0_wait_for_idle(void *handle) 3427 { 3428 unsigned i; 3429 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3430 3431 for (i = 0; i < adev->usec_timeout; i++) { 3432 if (gfx_v9_0_is_idle(handle)) 3433 return 0; 3434 udelay(1); 3435 } 3436 return -ETIMEDOUT; 3437 } 3438 3439 static int gfx_v9_0_soft_reset(void *handle) 3440 { 3441 u32 grbm_soft_reset = 0; 3442 u32 tmp; 3443 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3444 3445 /* GRBM_STATUS */ 3446 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS); 3447 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 3448 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 3449 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 3450 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 3451 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 3452 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 3453 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3454 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3455 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3456 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 3457 } 3458 3459 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 3460 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3461 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 3462 } 3463 3464 /* GRBM_STATUS2 */ 3465 tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); 3466 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 3467 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 3468 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3469 3470 3471 if (grbm_soft_reset) { 3472 /* stop the rlc */ 3473 adev->gfx.rlc.funcs->stop(adev); 3474 3475 /* Disable GFX parsing/prefetching */ 3476 gfx_v9_0_cp_gfx_enable(adev, false); 3477 3478 /* Disable MEC parsing/prefetching */ 3479 gfx_v9_0_cp_compute_enable(adev, false); 3480 3481 if (grbm_soft_reset) { 3482 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3483 tmp |= grbm_soft_reset; 3484 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 3485 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3486 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3487 3488 udelay(50); 3489 3490 tmp &= ~grbm_soft_reset; 3491 WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp); 3492 tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET); 3493 } 3494 3495 /* Wait a little for things to settle down */ 3496 udelay(50); 3497 } 3498 return 0; 3499 } 3500 3501 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) 3502 { 3503 uint64_t clock; 3504 3505 mutex_lock(&adev->gfx.gpu_clock_mutex); 3506 WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 3507 clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | 3508 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 3509 mutex_unlock(&adev->gfx.gpu_clock_mutex); 3510 return clock; 3511 } 3512 3513 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 3514 uint32_t vmid, 3515 uint32_t gds_base, uint32_t gds_size, 3516 uint32_t gws_base, uint32_t gws_size, 3517 uint32_t oa_base, uint32_t oa_size) 3518 { 3519 struct amdgpu_device *adev = ring->adev; 3520 3521 /* GDS Base */ 3522 gfx_v9_0_write_data_to_reg(ring, 0, false, 3523 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, 3524 gds_base); 3525 3526 /* GDS Size */ 3527 gfx_v9_0_write_data_to_reg(ring, 0, false, 3528 SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid, 3529 gds_size); 3530 3531 /* GWS */ 3532 gfx_v9_0_write_data_to_reg(ring, 0, false, 3533 SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid, 3534 gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 3535 3536 /* OA */ 3537 gfx_v9_0_write_data_to_reg(ring, 0, false, 3538 SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid, 3539 (1 << (oa_size + oa_base)) - (1 << oa_base)); 3540 } 3541 3542 static const u32 vgpr_init_compute_shader[] = 3543 { 3544 0xb07c0000, 0xbe8000ff, 3545 0x000000f8, 0xbf110800, 3546 0x7e000280, 0x7e020280, 3547 0x7e040280, 0x7e060280, 3548 0x7e080280, 0x7e0a0280, 3549 0x7e0c0280, 0x7e0e0280, 3550 0x80808800, 0xbe803200, 3551 0xbf84fff5, 0xbf9c0000, 3552 0xd28c0001, 0x0001007f, 3553 0xd28d0001, 0x0002027e, 3554 0x10020288, 0xb8810904, 3555 0xb7814000, 0xd1196a01, 3556 0x00000301, 0xbe800087, 3557 0xbefc00c1, 0xd89c4000, 3558 0x00020201, 0xd89cc080, 3559 0x00040401, 0x320202ff, 3560 0x00000800, 0x80808100, 3561 0xbf84fff8, 0x7e020280, 3562 0xbf810000, 0x00000000, 3563 }; 3564 3565 static const u32 sgpr_init_compute_shader[] = 3566 { 3567 0xb07c0000, 0xbe8000ff, 3568 0x0000005f, 0xbee50080, 3569 0xbe812c65, 0xbe822c65, 3570 0xbe832c65, 0xbe842c65, 3571 0xbe852c65, 0xb77c0005, 3572 0x80808500, 0xbf84fff8, 3573 0xbe800080, 0xbf810000, 3574 }; 3575 3576 static const struct soc15_reg_entry vgpr_init_regs[] = { 3577 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3578 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3579 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3580 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3581 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3582 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3583 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3584 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3585 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ 3586 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 }, /* 64KB LDS */ 3587 }; 3588 3589 static const struct soc15_reg_entry sgpr_init_regs[] = { 3590 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, 3591 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, 3592 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, 3593 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, 3594 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */ 3595 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, 3596 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, 3597 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, 3598 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ 3599 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, 3600 }; 3601 3602 static const struct soc15_reg_entry sec_ded_counter_registers[] = { 3603 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT) }, 3604 { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT) }, 3605 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT) }, 3606 { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT) }, 3607 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT) }, 3608 { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT) }, 3609 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT) }, 3610 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT) }, 3611 { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT) }, 3612 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT) }, 3613 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT) }, 3614 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED) }, 3615 { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT) }, 3616 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT) }, 3617 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT) }, 3618 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO) }, 3619 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT) }, 3620 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT) }, 3621 { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT) }, 3622 { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT) }, 3623 { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT) }, 3624 { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2) }, 3625 { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT) }, 3626 { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT) }, 3627 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT) }, 3628 { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT) }, 3629 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT) }, 3630 { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2) }, 3631 { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT) }, 3632 { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2) }, 3633 { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT) }, 3634 }; 3635 3636 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 3637 { 3638 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 3639 struct amdgpu_ib ib; 3640 struct dma_fence *f = NULL; 3641 int r, i, j; 3642 unsigned total_size, vgpr_offset, sgpr_offset; 3643 u64 gpu_addr; 3644 3645 /* only support when RAS is enabled */ 3646 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) 3647 return 0; 3648 3649 /* bail if the compute ring is not ready */ 3650 if (!ring->sched.ready) 3651 return 0; 3652 3653 total_size = 3654 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3655 total_size += 3656 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; 3657 total_size = ALIGN(total_size, 256); 3658 vgpr_offset = total_size; 3659 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 3660 sgpr_offset = total_size; 3661 total_size += sizeof(sgpr_init_compute_shader); 3662 3663 /* allocate an indirect buffer to put the commands in */ 3664 memset(&ib, 0, sizeof(ib)); 3665 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 3666 if (r) { 3667 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 3668 return r; 3669 } 3670 3671 /* load the compute shaders */ 3672 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 3673 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 3674 3675 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 3676 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 3677 3678 /* init the ib length to 0 */ 3679 ib.length_dw = 0; 3680 3681 /* VGPR */ 3682 /* write the register state for the compute dispatch */ 3683 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) { 3684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3685 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i]) 3686 - PACKET3_SET_SH_REG_START; 3687 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value; 3688 } 3689 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3690 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 3691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3692 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3693 - PACKET3_SET_SH_REG_START; 3694 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3695 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3696 3697 /* write dispatch packet */ 3698 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3699 ib.ptr[ib.length_dw++] = 128; /* x */ 3700 ib.ptr[ib.length_dw++] = 1; /* y */ 3701 ib.ptr[ib.length_dw++] = 1; /* z */ 3702 ib.ptr[ib.length_dw++] = 3703 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3704 3705 /* write CS partial flush packet */ 3706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3707 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3708 3709 /* SGPR */ 3710 /* write the register state for the compute dispatch */ 3711 for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) { 3712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 3713 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i]) 3714 - PACKET3_SET_SH_REG_START; 3715 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value; 3716 } 3717 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 3718 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 3719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 3720 ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO) 3721 - PACKET3_SET_SH_REG_START; 3722 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 3723 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 3724 3725 /* write dispatch packet */ 3726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 3727 ib.ptr[ib.length_dw++] = 128; /* x */ 3728 ib.ptr[ib.length_dw++] = 1; /* y */ 3729 ib.ptr[ib.length_dw++] = 1; /* z */ 3730 ib.ptr[ib.length_dw++] = 3731 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 3732 3733 /* write CS partial flush packet */ 3734 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 3735 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 3736 3737 /* shedule the ib on the ring */ 3738 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 3739 if (r) { 3740 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 3741 goto fail; 3742 } 3743 3744 /* wait for the GPU to finish processing the IB */ 3745 r = dma_fence_wait(f, false); 3746 if (r) { 3747 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 3748 goto fail; 3749 } 3750 3751 /* read back registers to clear the counters */ 3752 mutex_lock(&adev->grbm_idx_mutex); 3753 for (j = 0; j < 16; j++) { 3754 gfx_v9_0_select_se_sh(adev, 0x01, 0x0, j); 3755 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 3756 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3757 gfx_v9_0_select_se_sh(adev, 0x02, 0x0, j); 3758 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 3759 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3760 gfx_v9_0_select_se_sh(adev, 0x03, 0x0, j); 3761 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 3762 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3763 gfx_v9_0_select_se_sh(adev, 0x04, 0x0, j); 3764 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 3765 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i])); 3766 } 3767 WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000); 3768 mutex_unlock(&adev->grbm_idx_mutex); 3769 3770 fail: 3771 amdgpu_ib_free(adev, &ib, NULL); 3772 dma_fence_put(f); 3773 3774 return r; 3775 } 3776 3777 static int gfx_v9_0_early_init(void *handle) 3778 { 3779 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3780 3781 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; 3782 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 3783 gfx_v9_0_set_ring_funcs(adev); 3784 gfx_v9_0_set_irq_funcs(adev); 3785 gfx_v9_0_set_gds_init(adev); 3786 gfx_v9_0_set_rlc_funcs(adev); 3787 3788 return 0; 3789 } 3790 3791 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 3792 struct amdgpu_iv_entry *entry); 3793 3794 static int gfx_v9_0_ecc_late_init(void *handle) 3795 { 3796 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3797 struct ras_common_if **ras_if = &adev->gfx.ras_if; 3798 struct ras_ih_if ih_info = { 3799 .cb = gfx_v9_0_process_ras_data_cb, 3800 }; 3801 struct ras_fs_if fs_info = { 3802 .sysfs_name = "gfx_err_count", 3803 .debugfs_name = "gfx_err_inject", 3804 }; 3805 struct ras_common_if ras_block = { 3806 .block = AMDGPU_RAS_BLOCK__GFX, 3807 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, 3808 .sub_block_index = 0, 3809 .name = "gfx", 3810 }; 3811 int r; 3812 3813 if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { 3814 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); 3815 return 0; 3816 } 3817 3818 /* requires IBs so do in late init after IB pool is initialized */ 3819 r = gfx_v9_0_do_edc_gpr_workarounds(adev); 3820 if (r) 3821 return r; 3822 3823 /* handle resume path. */ 3824 if (*ras_if) { 3825 /* resend ras TA enable cmd during resume. 3826 * prepare to handle failure. 3827 */ 3828 ih_info.head = **ras_if; 3829 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3830 if (r) { 3831 if (r == -EAGAIN) { 3832 /* request a gpu reset. will run again. */ 3833 amdgpu_ras_request_reset_on_boot(adev, 3834 AMDGPU_RAS_BLOCK__GFX); 3835 return 0; 3836 } 3837 /* fail to enable ras, cleanup all. */ 3838 goto irq; 3839 } 3840 /* enable successfully. continue. */ 3841 goto resume; 3842 } 3843 3844 *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL); 3845 if (!*ras_if) 3846 return -ENOMEM; 3847 3848 **ras_if = ras_block; 3849 3850 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); 3851 if (r) { 3852 if (r == -EAGAIN) { 3853 amdgpu_ras_request_reset_on_boot(adev, 3854 AMDGPU_RAS_BLOCK__GFX); 3855 r = 0; 3856 } 3857 goto feature; 3858 } 3859 3860 ih_info.head = **ras_if; 3861 fs_info.head = **ras_if; 3862 3863 r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); 3864 if (r) 3865 goto interrupt; 3866 3867 r = amdgpu_ras_debugfs_create(adev, &fs_info); 3868 if (r) 3869 goto debugfs; 3870 3871 r = amdgpu_ras_sysfs_create(adev, &fs_info); 3872 if (r) 3873 goto sysfs; 3874 resume: 3875 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 3876 if (r) 3877 goto irq; 3878 3879 return 0; 3880 irq: 3881 amdgpu_ras_sysfs_remove(adev, *ras_if); 3882 sysfs: 3883 amdgpu_ras_debugfs_remove(adev, *ras_if); 3884 debugfs: 3885 amdgpu_ras_interrupt_remove_handler(adev, &ih_info); 3886 interrupt: 3887 amdgpu_ras_feature_enable(adev, *ras_if, 0); 3888 feature: 3889 kfree(*ras_if); 3890 *ras_if = NULL; 3891 return r; 3892 } 3893 3894 static int gfx_v9_0_late_init(void *handle) 3895 { 3896 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3897 int r; 3898 3899 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 3900 if (r) 3901 return r; 3902 3903 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 3904 if (r) 3905 return r; 3906 3907 r = gfx_v9_0_ecc_late_init(handle); 3908 if (r) 3909 return r; 3910 3911 return 0; 3912 } 3913 3914 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) 3915 { 3916 uint32_t rlc_setting; 3917 3918 /* if RLC is not enabled, do nothing */ 3919 rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); 3920 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 3921 return false; 3922 3923 return true; 3924 } 3925 3926 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) 3927 { 3928 uint32_t data; 3929 unsigned i; 3930 3931 data = RLC_SAFE_MODE__CMD_MASK; 3932 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 3933 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3934 3935 /* wait for RLC_SAFE_MODE */ 3936 for (i = 0; i < adev->usec_timeout; i++) { 3937 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 3938 break; 3939 udelay(1); 3940 } 3941 } 3942 3943 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) 3944 { 3945 uint32_t data; 3946 3947 data = RLC_SAFE_MODE__CMD_MASK; 3948 WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); 3949 } 3950 3951 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, 3952 bool enable) 3953 { 3954 amdgpu_gfx_rlc_enter_safe_mode(adev); 3955 3956 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 3957 gfx_v9_0_enable_gfx_cg_power_gating(adev, true); 3958 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 3959 gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); 3960 } else { 3961 gfx_v9_0_enable_gfx_cg_power_gating(adev, false); 3962 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); 3963 } 3964 3965 amdgpu_gfx_rlc_exit_safe_mode(adev); 3966 } 3967 3968 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, 3969 bool enable) 3970 { 3971 /* TODO: double check if we need to perform under safe mode */ 3972 /* gfx_v9_0_enter_rlc_safe_mode(adev); */ 3973 3974 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 3975 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); 3976 else 3977 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); 3978 3979 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 3980 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); 3981 else 3982 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); 3983 3984 /* gfx_v9_0_exit_rlc_safe_mode(adev); */ 3985 } 3986 3987 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 3988 bool enable) 3989 { 3990 uint32_t data, def; 3991 3992 amdgpu_gfx_rlc_enter_safe_mode(adev); 3993 3994 /* It is disabled by HW by default */ 3995 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 3996 /* 1 - RLC_CGTT_MGCG_OVERRIDE */ 3997 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 3998 3999 if (adev->asic_type != CHIP_VEGA12) 4000 data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4001 4002 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4003 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4004 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4005 4006 /* only for Vega10 & Raven1 */ 4007 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK; 4008 4009 if (def != data) 4010 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4011 4012 /* MGLS is a global flag to control all MGLS in GFX */ 4013 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 4014 /* 2 - RLC memory Light sleep */ 4015 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 4016 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4017 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4018 if (def != data) 4019 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4020 } 4021 /* 3 - CP memory Light sleep */ 4022 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 4023 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4024 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4025 if (def != data) 4026 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4027 } 4028 } 4029 } else { 4030 /* 1 - MGCG_OVERRIDE */ 4031 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4032 4033 if (adev->asic_type != CHIP_VEGA12) 4034 data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; 4035 4036 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | 4037 RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | 4038 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK | 4039 RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK); 4040 4041 if (def != data) 4042 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4043 4044 /* 2 - disable MGLS in RLC */ 4045 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4046 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 4047 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 4048 WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data); 4049 } 4050 4051 /* 3 - disable MGLS in CP */ 4052 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4053 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 4054 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 4055 WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); 4056 } 4057 } 4058 4059 amdgpu_gfx_rlc_exit_safe_mode(adev); 4060 } 4061 4062 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, 4063 bool enable) 4064 { 4065 uint32_t data, def; 4066 4067 amdgpu_gfx_rlc_enter_safe_mode(adev); 4068 4069 /* Enable 3D CGCG/CGLS */ 4070 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { 4071 /* write cmd to clear cgcg/cgls ov */ 4072 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4073 /* unset CGCG override */ 4074 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK; 4075 /* update CGCG and CGLS override bits */ 4076 if (def != data) 4077 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4078 4079 /* enable 3Dcgcg FSM(0x0000363f) */ 4080 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4081 4082 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4083 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK; 4084 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) 4085 data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4086 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK; 4087 if (def != data) 4088 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4089 4090 /* set IDLE_POLL_COUNT(0x00900100) */ 4091 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4092 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4093 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4094 if (def != data) 4095 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4096 } else { 4097 /* Disable CGCG/CGLS */ 4098 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4099 /* disable cgcg, cgls should be disabled */ 4100 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK | 4101 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK); 4102 /* disable cgcg and cgls in FSM */ 4103 if (def != data) 4104 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); 4105 } 4106 4107 amdgpu_gfx_rlc_exit_safe_mode(adev); 4108 } 4109 4110 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 4111 bool enable) 4112 { 4113 uint32_t def, data; 4114 4115 amdgpu_gfx_rlc_enter_safe_mode(adev); 4116 4117 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 4118 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4119 /* unset CGCG override */ 4120 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK; 4121 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4122 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4123 else 4124 data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK; 4125 /* update CGCG and CGLS override bits */ 4126 if (def != data) 4127 WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data); 4128 4129 /* enable cgcg FSM(0x0000363F) */ 4130 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4131 4132 data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | 4133 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 4134 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) 4135 data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | 4136 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 4137 if (def != data) 4138 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4139 4140 /* set IDLE_POLL_COUNT(0x00900100) */ 4141 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL); 4142 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | 4143 (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 4144 if (def != data) 4145 WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data); 4146 } else { 4147 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4148 /* reset CGCG/CGLS bits */ 4149 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4150 /* disable cgcg and cgls in FSM */ 4151 if (def != data) 4152 WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); 4153 } 4154 4155 amdgpu_gfx_rlc_exit_safe_mode(adev); 4156 } 4157 4158 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, 4159 bool enable) 4160 { 4161 if (enable) { 4162 /* CGCG/CGLS should be enabled after MGCG/MGLS 4163 * === MGCG + MGLS === 4164 */ 4165 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4166 /* === CGCG /CGLS for GFX 3D Only === */ 4167 gfx_v9_0_update_3d_clock_gating(adev, enable); 4168 /* === CGCG + CGLS === */ 4169 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4170 } else { 4171 /* CGCG/CGLS should be disabled before MGCG/MGLS 4172 * === CGCG + CGLS === 4173 */ 4174 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable); 4175 /* === CGCG /CGLS for GFX 3D Only === */ 4176 gfx_v9_0_update_3d_clock_gating(adev, enable); 4177 /* === MGCG + MGLS === */ 4178 gfx_v9_0_update_medium_grain_clock_gating(adev, enable); 4179 } 4180 return 0; 4181 } 4182 4183 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { 4184 .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, 4185 .set_safe_mode = gfx_v9_0_set_safe_mode, 4186 .unset_safe_mode = gfx_v9_0_unset_safe_mode, 4187 .init = gfx_v9_0_rlc_init, 4188 .get_csb_size = gfx_v9_0_get_csb_size, 4189 .get_csb_buffer = gfx_v9_0_get_csb_buffer, 4190 .get_cp_table_num = gfx_v9_0_cp_jump_table_num, 4191 .resume = gfx_v9_0_rlc_resume, 4192 .stop = gfx_v9_0_rlc_stop, 4193 .reset = gfx_v9_0_rlc_reset, 4194 .start = gfx_v9_0_rlc_start 4195 }; 4196 4197 static int gfx_v9_0_set_powergating_state(void *handle, 4198 enum amd_powergating_state state) 4199 { 4200 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4201 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 4202 4203 switch (adev->asic_type) { 4204 case CHIP_RAVEN: 4205 if (!enable) { 4206 amdgpu_gfx_off_ctrl(adev, false); 4207 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4208 } 4209 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4210 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); 4211 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); 4212 } else { 4213 gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); 4214 gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); 4215 } 4216 4217 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4218 gfx_v9_0_enable_cp_power_gating(adev, true); 4219 else 4220 gfx_v9_0_enable_cp_power_gating(adev, false); 4221 4222 /* update gfx cgpg state */ 4223 gfx_v9_0_update_gfx_cg_power_gating(adev, enable); 4224 4225 /* update mgcg state */ 4226 gfx_v9_0_update_gfx_mg_power_gating(adev, enable); 4227 4228 if (enable) 4229 amdgpu_gfx_off_ctrl(adev, true); 4230 break; 4231 case CHIP_VEGA12: 4232 if (!enable) { 4233 amdgpu_gfx_off_ctrl(adev, false); 4234 cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); 4235 } else { 4236 amdgpu_gfx_off_ctrl(adev, true); 4237 } 4238 break; 4239 default: 4240 break; 4241 } 4242 4243 return 0; 4244 } 4245 4246 static int gfx_v9_0_set_clockgating_state(void *handle, 4247 enum amd_clockgating_state state) 4248 { 4249 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4250 4251 if (amdgpu_sriov_vf(adev)) 4252 return 0; 4253 4254 switch (adev->asic_type) { 4255 case CHIP_VEGA10: 4256 case CHIP_VEGA12: 4257 case CHIP_VEGA20: 4258 case CHIP_RAVEN: 4259 gfx_v9_0_update_gfx_clock_gating(adev, 4260 state == AMD_CG_STATE_GATE ? true : false); 4261 break; 4262 default: 4263 break; 4264 } 4265 return 0; 4266 } 4267 4268 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) 4269 { 4270 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4271 int data; 4272 4273 if (amdgpu_sriov_vf(adev)) 4274 *flags = 0; 4275 4276 /* AMD_CG_SUPPORT_GFX_MGCG */ 4277 data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); 4278 if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK)) 4279 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 4280 4281 /* AMD_CG_SUPPORT_GFX_CGCG */ 4282 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); 4283 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 4284 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 4285 4286 /* AMD_CG_SUPPORT_GFX_CGLS */ 4287 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 4288 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 4289 4290 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 4291 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL); 4292 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 4293 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 4294 4295 /* AMD_CG_SUPPORT_GFX_CP_LS */ 4296 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL); 4297 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 4298 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 4299 4300 /* AMD_CG_SUPPORT_GFX_3D_CGCG */ 4301 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); 4302 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) 4303 *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; 4304 4305 /* AMD_CG_SUPPORT_GFX_3D_CGLS */ 4306 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) 4307 *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; 4308 } 4309 4310 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 4311 { 4312 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/ 4313 } 4314 4315 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 4316 { 4317 struct amdgpu_device *adev = ring->adev; 4318 u64 wptr; 4319 4320 /* XXX check if swapping is necessary on BE */ 4321 if (ring->use_doorbell) { 4322 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]); 4323 } else { 4324 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR); 4325 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32; 4326 } 4327 4328 return wptr; 4329 } 4330 4331 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 4332 { 4333 struct amdgpu_device *adev = ring->adev; 4334 4335 if (ring->use_doorbell) { 4336 /* XXX check if swapping is necessary on BE */ 4337 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4338 WDOORBELL64(ring->doorbell_index, ring->wptr); 4339 } else { 4340 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4341 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr)); 4342 } 4343 } 4344 4345 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 4346 { 4347 struct amdgpu_device *adev = ring->adev; 4348 u32 ref_and_mask, reg_mem_engine; 4349 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; 4350 4351 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4352 switch (ring->me) { 4353 case 1: 4354 ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe; 4355 break; 4356 case 2: 4357 ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe; 4358 break; 4359 default: 4360 return; 4361 } 4362 reg_mem_engine = 0; 4363 } else { 4364 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; 4365 reg_mem_engine = 1; /* pfp */ 4366 } 4367 4368 gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1, 4369 adev->nbio_funcs->get_hdp_flush_req_offset(adev), 4370 adev->nbio_funcs->get_hdp_flush_done_offset(adev), 4371 ref_and_mask, ref_and_mask, 0x20); 4372 } 4373 4374 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 4375 struct amdgpu_job *job, 4376 struct amdgpu_ib *ib, 4377 uint32_t flags) 4378 { 4379 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4380 u32 header, control = 0; 4381 4382 if (ib->flags & AMDGPU_IB_FLAG_CE) 4383 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 4384 else 4385 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 4386 4387 control |= ib->length_dw | (vmid << 24); 4388 4389 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 4390 control |= INDIRECT_BUFFER_PRE_ENB(1); 4391 4392 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 4393 gfx_v9_0_ring_emit_de_meta(ring); 4394 } 4395 4396 amdgpu_ring_write(ring, header); 4397 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4398 amdgpu_ring_write(ring, 4399 #ifdef __BIG_ENDIAN 4400 (2 << 0) | 4401 #endif 4402 lower_32_bits(ib->gpu_addr)); 4403 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4404 amdgpu_ring_write(ring, control); 4405 } 4406 4407 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 4408 struct amdgpu_job *job, 4409 struct amdgpu_ib *ib, 4410 uint32_t flags) 4411 { 4412 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 4413 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 4414 4415 /* Currently, there is a high possibility to get wave ID mismatch 4416 * between ME and GDS, leading to a hw deadlock, because ME generates 4417 * different wave IDs than the GDS expects. This situation happens 4418 * randomly when at least 5 compute pipes use GDS ordered append. 4419 * The wave IDs generated by ME are also wrong after suspend/resume. 4420 * Those are probably bugs somewhere else in the kernel driver. 4421 * 4422 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 4423 * GDS to 0 for this ring (me/pipe). 4424 */ 4425 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 4426 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 4427 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID); 4428 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 4429 } 4430 4431 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 4432 BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ 4433 amdgpu_ring_write(ring, 4434 #ifdef __BIG_ENDIAN 4435 (2 << 0) | 4436 #endif 4437 lower_32_bits(ib->gpu_addr)); 4438 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); 4439 amdgpu_ring_write(ring, control); 4440 } 4441 4442 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 4443 u64 seq, unsigned flags) 4444 { 4445 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 4446 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 4447 bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; 4448 4449 /* RELEASE_MEM - flush caches, send int */ 4450 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); 4451 amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | 4452 EOP_TC_NC_ACTION_EN) : 4453 (EOP_TCL1_ACTION_EN | 4454 EOP_TC_ACTION_EN | 4455 EOP_TC_WB_ACTION_EN | 4456 EOP_TC_MD_ACTION_EN)) | 4457 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 4458 EVENT_INDEX(5))); 4459 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 4460 4461 /* 4462 * the address should be Qword aligned if 64bit write, Dword 4463 * aligned if only send 32bit data low (discard data high) 4464 */ 4465 if (write64bit) 4466 BUG_ON(addr & 0x7); 4467 else 4468 BUG_ON(addr & 0x3); 4469 amdgpu_ring_write(ring, lower_32_bits(addr)); 4470 amdgpu_ring_write(ring, upper_32_bits(addr)); 4471 amdgpu_ring_write(ring, lower_32_bits(seq)); 4472 amdgpu_ring_write(ring, upper_32_bits(seq)); 4473 amdgpu_ring_write(ring, 0); 4474 } 4475 4476 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 4477 { 4478 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4479 uint32_t seq = ring->fence_drv.sync_seq; 4480 uint64_t addr = ring->fence_drv.gpu_addr; 4481 4482 gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0, 4483 lower_32_bits(addr), upper_32_bits(addr), 4484 seq, 0xffffffff, 4); 4485 } 4486 4487 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 4488 unsigned vmid, uint64_t pd_addr) 4489 { 4490 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 4491 4492 /* compute doesn't have PFP */ 4493 if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) { 4494 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 4495 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 4496 amdgpu_ring_write(ring, 0x0); 4497 } 4498 } 4499 4500 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 4501 { 4502 return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */ 4503 } 4504 4505 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 4506 { 4507 u64 wptr; 4508 4509 /* XXX check if swapping is necessary on BE */ 4510 if (ring->use_doorbell) 4511 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]); 4512 else 4513 BUG(); 4514 return wptr; 4515 } 4516 4517 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 4518 bool acquire) 4519 { 4520 struct amdgpu_device *adev = ring->adev; 4521 int pipe_num, tmp, reg; 4522 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 4523 4524 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 4525 4526 /* first me only has 2 entries, GFX and HP3D */ 4527 if (ring->me > 0) 4528 pipe_num -= 2; 4529 4530 reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num; 4531 tmp = RREG32(reg); 4532 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 4533 WREG32(reg, tmp); 4534 } 4535 4536 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev, 4537 struct amdgpu_ring *ring, 4538 bool acquire) 4539 { 4540 int i, pipe; 4541 bool reserve; 4542 struct amdgpu_ring *iring; 4543 4544 mutex_lock(&adev->gfx.pipe_reserve_mutex); 4545 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 4546 if (acquire) 4547 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4548 else 4549 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4550 4551 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 4552 /* Clear all reservations - everyone reacquires all resources */ 4553 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 4554 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 4555 true); 4556 4557 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 4558 gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 4559 true); 4560 } else { 4561 /* Lower all pipes without a current reservation */ 4562 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 4563 iring = &adev->gfx.gfx_ring[i]; 4564 pipe = amdgpu_gfx_queue_to_bit(adev, 4565 iring->me, 4566 iring->pipe, 4567 0); 4568 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4569 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4570 } 4571 4572 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 4573 iring = &adev->gfx.compute_ring[i]; 4574 pipe = amdgpu_gfx_queue_to_bit(adev, 4575 iring->me, 4576 iring->pipe, 4577 0); 4578 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 4579 gfx_v9_0_ring_set_pipe_percent(iring, reserve); 4580 } 4581 } 4582 4583 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 4584 } 4585 4586 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev, 4587 struct amdgpu_ring *ring, 4588 bool acquire) 4589 { 4590 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 4591 uint32_t queue_priority = acquire ? 0xf : 0x0; 4592 4593 mutex_lock(&adev->srbm_mutex); 4594 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4595 4596 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority); 4597 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority); 4598 4599 soc15_grbm_select(adev, 0, 0, 0, 0); 4600 mutex_unlock(&adev->srbm_mutex); 4601 } 4602 4603 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring, 4604 enum drm_sched_priority priority) 4605 { 4606 struct amdgpu_device *adev = ring->adev; 4607 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 4608 4609 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 4610 return; 4611 4612 gfx_v9_0_hqd_set_priority(adev, ring, acquire); 4613 gfx_v9_0_pipe_reserve_resources(adev, ring, acquire); 4614 } 4615 4616 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 4617 { 4618 struct amdgpu_device *adev = ring->adev; 4619 4620 /* XXX check if swapping is necessary on BE */ 4621 if (ring->use_doorbell) { 4622 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr); 4623 WDOORBELL64(ring->doorbell_index, ring->wptr); 4624 } else{ 4625 BUG(); /* only DOORBELL method supported on gfx9 now */ 4626 } 4627 } 4628 4629 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 4630 u64 seq, unsigned int flags) 4631 { 4632 struct amdgpu_device *adev = ring->adev; 4633 4634 /* we only allocate 32bit for each seq wb address */ 4635 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 4636 4637 /* write fence seq to the "addr" */ 4638 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4639 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4640 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 4641 amdgpu_ring_write(ring, lower_32_bits(addr)); 4642 amdgpu_ring_write(ring, upper_32_bits(addr)); 4643 amdgpu_ring_write(ring, lower_32_bits(seq)); 4644 4645 if (flags & AMDGPU_FENCE_FLAG_INT) { 4646 /* set register to trigger INT */ 4647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4648 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 4649 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 4650 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS)); 4651 amdgpu_ring_write(ring, 0); 4652 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 4653 } 4654 } 4655 4656 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring) 4657 { 4658 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 4659 amdgpu_ring_write(ring, 0); 4660 } 4661 4662 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 4663 { 4664 struct v9_ce_ib_state ce_payload = {0}; 4665 uint64_t csa_addr; 4666 int cnt; 4667 4668 cnt = (sizeof(ce_payload) >> 2) + 4 - 2; 4669 csa_addr = amdgpu_csa_vaddr(ring->adev); 4670 4671 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4672 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 4673 WRITE_DATA_DST_SEL(8) | 4674 WR_CONFIRM) | 4675 WRITE_DATA_CACHE_POLICY(0)); 4676 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4677 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); 4678 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); 4679 } 4680 4681 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) 4682 { 4683 struct v9_de_ib_state de_payload = {0}; 4684 uint64_t csa_addr, gds_addr; 4685 int cnt; 4686 4687 csa_addr = amdgpu_csa_vaddr(ring->adev); 4688 gds_addr = csa_addr + 4096; 4689 de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); 4690 de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); 4691 4692 cnt = (sizeof(de_payload) >> 2) + 4 - 2; 4693 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); 4694 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 4695 WRITE_DATA_DST_SEL(8) | 4696 WR_CONFIRM) | 4697 WRITE_DATA_CACHE_POLICY(0)); 4698 amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4699 amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); 4700 amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); 4701 } 4702 4703 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) 4704 { 4705 amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); 4706 amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ 4707 } 4708 4709 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 4710 { 4711 uint32_t dw2 = 0; 4712 4713 if (amdgpu_sriov_vf(ring->adev)) 4714 gfx_v9_0_ring_emit_ce_meta(ring); 4715 4716 gfx_v9_0_ring_emit_tmz(ring, true); 4717 4718 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 4719 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 4720 /* set load_global_config & load_global_uconfig */ 4721 dw2 |= 0x8001; 4722 /* set load_cs_sh_regs */ 4723 dw2 |= 0x01000000; 4724 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 4725 dw2 |= 0x10002; 4726 4727 /* set load_ce_ram if preamble presented */ 4728 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 4729 dw2 |= 0x10000000; 4730 } else { 4731 /* still load_ce_ram if this is the first time preamble presented 4732 * although there is no context switch happens. 4733 */ 4734 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 4735 dw2 |= 0x10000000; 4736 } 4737 4738 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4739 amdgpu_ring_write(ring, dw2); 4740 amdgpu_ring_write(ring, 0); 4741 } 4742 4743 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 4744 { 4745 unsigned ret; 4746 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 4747 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 4748 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 4749 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 4750 ret = ring->wptr & ring->buf_mask; 4751 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 4752 return ret; 4753 } 4754 4755 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 4756 { 4757 unsigned cur; 4758 BUG_ON(offset > ring->buf_mask); 4759 BUG_ON(ring->ring[offset] != 0x55aa55aa); 4760 4761 cur = (ring->wptr & ring->buf_mask) - 1; 4762 if (likely(cur > offset)) 4763 ring->ring[offset] = cur - offset; 4764 else 4765 ring->ring[offset] = (ring->ring_size>>2) - offset + cur; 4766 } 4767 4768 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 4769 { 4770 struct amdgpu_device *adev = ring->adev; 4771 4772 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 4773 amdgpu_ring_write(ring, 0 | /* src: register*/ 4774 (5 << 8) | /* dst: memory */ 4775 (1 << 20)); /* write confirm */ 4776 amdgpu_ring_write(ring, reg); 4777 amdgpu_ring_write(ring, 0); 4778 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 4779 adev->virt.reg_val_offs * 4)); 4780 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 4781 adev->virt.reg_val_offs * 4)); 4782 } 4783 4784 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 4785 uint32_t val) 4786 { 4787 uint32_t cmd = 0; 4788 4789 switch (ring->funcs->type) { 4790 case AMDGPU_RING_TYPE_GFX: 4791 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 4792 break; 4793 case AMDGPU_RING_TYPE_KIQ: 4794 cmd = (1 << 16); /* no inc addr */ 4795 break; 4796 default: 4797 cmd = WR_CONFIRM; 4798 break; 4799 } 4800 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 4801 amdgpu_ring_write(ring, cmd); 4802 amdgpu_ring_write(ring, reg); 4803 amdgpu_ring_write(ring, 0); 4804 amdgpu_ring_write(ring, val); 4805 } 4806 4807 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, 4808 uint32_t val, uint32_t mask) 4809 { 4810 gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); 4811 } 4812 4813 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, 4814 uint32_t reg0, uint32_t reg1, 4815 uint32_t ref, uint32_t mask) 4816 { 4817 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 4818 struct amdgpu_device *adev = ring->adev; 4819 bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? 4820 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; 4821 4822 if (fw_version_ok) 4823 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, 4824 ref, mask, 0x20); 4825 else 4826 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, 4827 ref, mask); 4828 } 4829 4830 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 4831 { 4832 struct amdgpu_device *adev = ring->adev; 4833 uint32_t value = 0; 4834 4835 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 4836 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 4837 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 4838 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 4839 WREG32(mmSQ_CMD, value); 4840 } 4841 4842 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 4843 enum amdgpu_interrupt_state state) 4844 { 4845 switch (state) { 4846 case AMDGPU_IRQ_STATE_DISABLE: 4847 case AMDGPU_IRQ_STATE_ENABLE: 4848 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4849 TIME_STAMP_INT_ENABLE, 4850 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4851 break; 4852 default: 4853 break; 4854 } 4855 } 4856 4857 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 4858 int me, int pipe, 4859 enum amdgpu_interrupt_state state) 4860 { 4861 u32 mec_int_cntl, mec_int_cntl_reg; 4862 4863 /* 4864 * amdgpu controls only the first MEC. That's why this function only 4865 * handles the setting of interrupts for this specific MEC. All other 4866 * pipes' interrupts are set by amdkfd. 4867 */ 4868 4869 if (me == 1) { 4870 switch (pipe) { 4871 case 0: 4872 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); 4873 break; 4874 case 1: 4875 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); 4876 break; 4877 case 2: 4878 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); 4879 break; 4880 case 3: 4881 mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); 4882 break; 4883 default: 4884 DRM_DEBUG("invalid pipe %d\n", pipe); 4885 return; 4886 } 4887 } else { 4888 DRM_DEBUG("invalid me %d\n", me); 4889 return; 4890 } 4891 4892 switch (state) { 4893 case AMDGPU_IRQ_STATE_DISABLE: 4894 mec_int_cntl = RREG32(mec_int_cntl_reg); 4895 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4896 TIME_STAMP_INT_ENABLE, 0); 4897 WREG32(mec_int_cntl_reg, mec_int_cntl); 4898 break; 4899 case AMDGPU_IRQ_STATE_ENABLE: 4900 mec_int_cntl = RREG32(mec_int_cntl_reg); 4901 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 4902 TIME_STAMP_INT_ENABLE, 1); 4903 WREG32(mec_int_cntl_reg, mec_int_cntl); 4904 break; 4905 default: 4906 break; 4907 } 4908 } 4909 4910 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 4911 struct amdgpu_irq_src *source, 4912 unsigned type, 4913 enum amdgpu_interrupt_state state) 4914 { 4915 switch (state) { 4916 case AMDGPU_IRQ_STATE_DISABLE: 4917 case AMDGPU_IRQ_STATE_ENABLE: 4918 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4919 PRIV_REG_INT_ENABLE, 4920 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4921 break; 4922 default: 4923 break; 4924 } 4925 4926 return 0; 4927 } 4928 4929 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 4930 struct amdgpu_irq_src *source, 4931 unsigned type, 4932 enum amdgpu_interrupt_state state) 4933 { 4934 switch (state) { 4935 case AMDGPU_IRQ_STATE_DISABLE: 4936 case AMDGPU_IRQ_STATE_ENABLE: 4937 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4938 PRIV_INSTR_INT_ENABLE, 4939 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); 4940 default: 4941 break; 4942 } 4943 4944 return 0; 4945 } 4946 4947 #define ENABLE_ECC_ON_ME_PIPE(me, pipe) \ 4948 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4949 CP_ECC_ERROR_INT_ENABLE, 1) 4950 4951 #define DISABLE_ECC_ON_ME_PIPE(me, pipe) \ 4952 WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\ 4953 CP_ECC_ERROR_INT_ENABLE, 0) 4954 4955 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev, 4956 struct amdgpu_irq_src *source, 4957 unsigned type, 4958 enum amdgpu_interrupt_state state) 4959 { 4960 switch (state) { 4961 case AMDGPU_IRQ_STATE_DISABLE: 4962 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4963 CP_ECC_ERROR_INT_ENABLE, 0); 4964 DISABLE_ECC_ON_ME_PIPE(1, 0); 4965 DISABLE_ECC_ON_ME_PIPE(1, 1); 4966 DISABLE_ECC_ON_ME_PIPE(1, 2); 4967 DISABLE_ECC_ON_ME_PIPE(1, 3); 4968 break; 4969 4970 case AMDGPU_IRQ_STATE_ENABLE: 4971 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, 4972 CP_ECC_ERROR_INT_ENABLE, 1); 4973 ENABLE_ECC_ON_ME_PIPE(1, 0); 4974 ENABLE_ECC_ON_ME_PIPE(1, 1); 4975 ENABLE_ECC_ON_ME_PIPE(1, 2); 4976 ENABLE_ECC_ON_ME_PIPE(1, 3); 4977 break; 4978 default: 4979 break; 4980 } 4981 4982 return 0; 4983 } 4984 4985 4986 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev, 4987 struct amdgpu_irq_src *src, 4988 unsigned type, 4989 enum amdgpu_interrupt_state state) 4990 { 4991 switch (type) { 4992 case AMDGPU_CP_IRQ_GFX_EOP: 4993 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state); 4994 break; 4995 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 4996 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 4997 break; 4998 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 4999 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 5000 break; 5001 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 5002 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 5003 break; 5004 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 5005 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 5006 break; 5007 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 5008 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 5009 break; 5010 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 5011 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 5012 break; 5013 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 5014 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 5015 break; 5016 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 5017 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 5018 break; 5019 default: 5020 break; 5021 } 5022 return 0; 5023 } 5024 5025 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, 5026 struct amdgpu_irq_src *source, 5027 struct amdgpu_iv_entry *entry) 5028 { 5029 int i; 5030 u8 me_id, pipe_id, queue_id; 5031 struct amdgpu_ring *ring; 5032 5033 DRM_DEBUG("IH: CP EOP\n"); 5034 me_id = (entry->ring_id & 0x0c) >> 2; 5035 pipe_id = (entry->ring_id & 0x03) >> 0; 5036 queue_id = (entry->ring_id & 0x70) >> 4; 5037 5038 switch (me_id) { 5039 case 0: 5040 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 5041 break; 5042 case 1: 5043 case 2: 5044 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5045 ring = &adev->gfx.compute_ring[i]; 5046 /* Per-queue interrupt is supported for MEC starting from VI. 5047 * The interrupt can only be enabled/disabled per pipe instead of per queue. 5048 */ 5049 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 5050 amdgpu_fence_process(ring); 5051 } 5052 break; 5053 } 5054 return 0; 5055 } 5056 5057 static void gfx_v9_0_fault(struct amdgpu_device *adev, 5058 struct amdgpu_iv_entry *entry) 5059 { 5060 u8 me_id, pipe_id, queue_id; 5061 struct amdgpu_ring *ring; 5062 int i; 5063 5064 me_id = (entry->ring_id & 0x0c) >> 2; 5065 pipe_id = (entry->ring_id & 0x03) >> 0; 5066 queue_id = (entry->ring_id & 0x70) >> 4; 5067 5068 switch (me_id) { 5069 case 0: 5070 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 5071 break; 5072 case 1: 5073 case 2: 5074 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5075 ring = &adev->gfx.compute_ring[i]; 5076 if (ring->me == me_id && ring->pipe == pipe_id && 5077 ring->queue == queue_id) 5078 drm_sched_fault(&ring->sched); 5079 } 5080 break; 5081 } 5082 } 5083 5084 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, 5085 struct amdgpu_irq_src *source, 5086 struct amdgpu_iv_entry *entry) 5087 { 5088 DRM_ERROR("Illegal register access in command stream\n"); 5089 gfx_v9_0_fault(adev, entry); 5090 return 0; 5091 } 5092 5093 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, 5094 struct amdgpu_irq_src *source, 5095 struct amdgpu_iv_entry *entry) 5096 { 5097 DRM_ERROR("Illegal instruction in command stream\n"); 5098 gfx_v9_0_fault(adev, entry); 5099 return 0; 5100 } 5101 5102 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, 5103 struct amdgpu_iv_entry *entry) 5104 { 5105 /* TODO ue will trigger an interrupt. */ 5106 kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); 5107 amdgpu_ras_reset_gpu(adev, 0); 5108 return AMDGPU_RAS_UE; 5109 } 5110 5111 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, 5112 struct amdgpu_irq_src *source, 5113 struct amdgpu_iv_entry *entry) 5114 { 5115 struct ras_common_if *ras_if = adev->gfx.ras_if; 5116 struct ras_dispatch_if ih_data = { 5117 .entry = entry, 5118 }; 5119 5120 if (!ras_if) 5121 return 0; 5122 5123 ih_data.head = *ras_if; 5124 5125 DRM_ERROR("CP ECC ERROR IRQ\n"); 5126 amdgpu_ras_interrupt_dispatch(adev, &ih_data); 5127 return 0; 5128 } 5129 5130 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { 5131 .name = "gfx_v9_0", 5132 .early_init = gfx_v9_0_early_init, 5133 .late_init = gfx_v9_0_late_init, 5134 .sw_init = gfx_v9_0_sw_init, 5135 .sw_fini = gfx_v9_0_sw_fini, 5136 .hw_init = gfx_v9_0_hw_init, 5137 .hw_fini = gfx_v9_0_hw_fini, 5138 .suspend = gfx_v9_0_suspend, 5139 .resume = gfx_v9_0_resume, 5140 .is_idle = gfx_v9_0_is_idle, 5141 .wait_for_idle = gfx_v9_0_wait_for_idle, 5142 .soft_reset = gfx_v9_0_soft_reset, 5143 .set_clockgating_state = gfx_v9_0_set_clockgating_state, 5144 .set_powergating_state = gfx_v9_0_set_powergating_state, 5145 .get_clockgating_state = gfx_v9_0_get_clockgating_state, 5146 }; 5147 5148 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { 5149 .type = AMDGPU_RING_TYPE_GFX, 5150 .align_mask = 0xff, 5151 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5152 .support_64bit_ptrs = true, 5153 .vmhub = AMDGPU_GFXHUB, 5154 .get_rptr = gfx_v9_0_ring_get_rptr_gfx, 5155 .get_wptr = gfx_v9_0_ring_get_wptr_gfx, 5156 .set_wptr = gfx_v9_0_ring_set_wptr_gfx, 5157 .emit_frame_size = /* totally 242 maximum if 16 IBs */ 5158 5 + /* COND_EXEC */ 5159 7 + /* PIPELINE_SYNC */ 5160 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5161 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5162 2 + /* VM_FLUSH */ 5163 8 + /* FENCE for VM_FLUSH */ 5164 20 + /* GDS switch */ 5165 4 + /* double SWITCH_BUFFER, 5166 the first COND_EXEC jump to the place just 5167 prior to this double SWITCH_BUFFER */ 5168 5 + /* COND_EXEC */ 5169 7 + /* HDP_flush */ 5170 4 + /* VGT_flush */ 5171 14 + /* CE_META */ 5172 31 + /* DE_META */ 5173 3 + /* CNTX_CTRL */ 5174 5 + /* HDP_INVL */ 5175 8 + 8 + /* FENCE x2 */ 5176 2, /* SWITCH_BUFFER */ 5177 .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ 5178 .emit_ib = gfx_v9_0_ring_emit_ib_gfx, 5179 .emit_fence = gfx_v9_0_ring_emit_fence, 5180 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5181 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5182 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5183 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5184 .test_ring = gfx_v9_0_ring_test_ring, 5185 .test_ib = gfx_v9_0_ring_test_ib, 5186 .insert_nop = amdgpu_ring_insert_nop, 5187 .pad_ib = amdgpu_ring_generic_pad_ib, 5188 .emit_switch_buffer = gfx_v9_ring_emit_sb, 5189 .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, 5190 .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, 5191 .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, 5192 .emit_tmz = gfx_v9_0_ring_emit_tmz, 5193 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5194 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5195 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5196 .soft_recovery = gfx_v9_0_ring_soft_recovery, 5197 }; 5198 5199 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 5200 .type = AMDGPU_RING_TYPE_COMPUTE, 5201 .align_mask = 0xff, 5202 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5203 .support_64bit_ptrs = true, 5204 .vmhub = AMDGPU_GFXHUB, 5205 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5206 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5207 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5208 .emit_frame_size = 5209 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5210 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5211 5 + /* hdp invalidate */ 5212 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5213 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5214 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5215 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5216 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 5217 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5218 .emit_ib = gfx_v9_0_ring_emit_ib_compute, 5219 .emit_fence = gfx_v9_0_ring_emit_fence, 5220 .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, 5221 .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, 5222 .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, 5223 .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, 5224 .test_ring = gfx_v9_0_ring_test_ring, 5225 .test_ib = gfx_v9_0_ring_test_ib, 5226 .insert_nop = amdgpu_ring_insert_nop, 5227 .pad_ib = amdgpu_ring_generic_pad_ib, 5228 .set_priority = gfx_v9_0_ring_set_priority_compute, 5229 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5230 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5231 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5232 }; 5233 5234 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 5235 .type = AMDGPU_RING_TYPE_KIQ, 5236 .align_mask = 0xff, 5237 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 5238 .support_64bit_ptrs = true, 5239 .vmhub = AMDGPU_GFXHUB, 5240 .get_rptr = gfx_v9_0_ring_get_rptr_compute, 5241 .get_wptr = gfx_v9_0_ring_get_wptr_compute, 5242 .set_wptr = gfx_v9_0_ring_set_wptr_compute, 5243 .emit_frame_size = 5244 20 + /* gfx_v9_0_ring_emit_gds_switch */ 5245 7 + /* gfx_v9_0_ring_emit_hdp_flush */ 5246 5 + /* hdp invalidate */ 5247 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ 5248 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 5249 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + 5250 2 + /* gfx_v9_0_ring_emit_vm_flush */ 5251 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 5252 .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ 5253 .emit_fence = gfx_v9_0_ring_emit_fence_kiq, 5254 .test_ring = gfx_v9_0_ring_test_ring, 5255 .insert_nop = amdgpu_ring_insert_nop, 5256 .pad_ib = amdgpu_ring_generic_pad_ib, 5257 .emit_rreg = gfx_v9_0_ring_emit_rreg, 5258 .emit_wreg = gfx_v9_0_ring_emit_wreg, 5259 .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, 5260 .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, 5261 }; 5262 5263 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) 5264 { 5265 int i; 5266 5267 adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; 5268 5269 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 5270 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; 5271 5272 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5273 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; 5274 } 5275 5276 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { 5277 .set = gfx_v9_0_set_eop_interrupt_state, 5278 .process = gfx_v9_0_eop_irq, 5279 }; 5280 5281 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = { 5282 .set = gfx_v9_0_set_priv_reg_fault_state, 5283 .process = gfx_v9_0_priv_reg_irq, 5284 }; 5285 5286 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = { 5287 .set = gfx_v9_0_set_priv_inst_fault_state, 5288 .process = gfx_v9_0_priv_inst_irq, 5289 }; 5290 5291 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = { 5292 .set = gfx_v9_0_set_cp_ecc_error_state, 5293 .process = gfx_v9_0_cp_ecc_error_irq, 5294 }; 5295 5296 5297 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) 5298 { 5299 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 5300 adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs; 5301 5302 adev->gfx.priv_reg_irq.num_types = 1; 5303 adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs; 5304 5305 adev->gfx.priv_inst_irq.num_types = 1; 5306 adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; 5307 5308 adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/ 5309 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs; 5310 } 5311 5312 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) 5313 { 5314 switch (adev->asic_type) { 5315 case CHIP_VEGA10: 5316 case CHIP_VEGA12: 5317 case CHIP_VEGA20: 5318 case CHIP_RAVEN: 5319 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; 5320 break; 5321 default: 5322 break; 5323 } 5324 } 5325 5326 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) 5327 { 5328 /* init asci gds info */ 5329 switch (adev->asic_type) { 5330 case CHIP_VEGA10: 5331 case CHIP_VEGA12: 5332 case CHIP_VEGA20: 5333 adev->gds.gds_size = 0x10000; 5334 break; 5335 case CHIP_RAVEN: 5336 adev->gds.gds_size = 0x1000; 5337 break; 5338 default: 5339 adev->gds.gds_size = 0x10000; 5340 break; 5341 } 5342 5343 switch (adev->asic_type) { 5344 case CHIP_VEGA10: 5345 case CHIP_VEGA20: 5346 adev->gds.gds_compute_max_wave_id = 0x7ff; 5347 break; 5348 case CHIP_VEGA12: 5349 adev->gds.gds_compute_max_wave_id = 0x27f; 5350 break; 5351 case CHIP_RAVEN: 5352 if (adev->rev_id >= 0x8) 5353 adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */ 5354 else 5355 adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ 5356 break; 5357 default: 5358 /* this really depends on the chip */ 5359 adev->gds.gds_compute_max_wave_id = 0x7ff; 5360 break; 5361 } 5362 5363 adev->gds.gws_size = 64; 5364 adev->gds.oa_size = 16; 5365 } 5366 5367 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 5368 u32 bitmap) 5369 { 5370 u32 data; 5371 5372 if (!bitmap) 5373 return; 5374 5375 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5376 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5377 5378 WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data); 5379 } 5380 5381 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) 5382 { 5383 u32 data, mask; 5384 5385 data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG); 5386 data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG); 5387 5388 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 5389 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 5390 5391 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 5392 5393 return (~data) & mask; 5394 } 5395 5396 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, 5397 struct amdgpu_cu_info *cu_info) 5398 { 5399 int i, j, k, counter, active_cu_number = 0; 5400 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 5401 unsigned disable_masks[4 * 2]; 5402 5403 if (!adev || !cu_info) 5404 return -EINVAL; 5405 5406 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 5407 5408 mutex_lock(&adev->grbm_idx_mutex); 5409 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 5410 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 5411 mask = 1; 5412 ao_bitmap = 0; 5413 counter = 0; 5414 gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); 5415 if (i < 4 && j < 2) 5416 gfx_v9_0_set_user_cu_inactive_bitmap( 5417 adev, disable_masks[i * 2 + j]); 5418 bitmap = gfx_v9_0_get_cu_active_bitmap(adev); 5419 cu_info->bitmap[i][j] = bitmap; 5420 5421 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 5422 if (bitmap & mask) { 5423 if (counter < adev->gfx.config.max_cu_per_sh) 5424 ao_bitmap |= mask; 5425 counter ++; 5426 } 5427 mask <<= 1; 5428 } 5429 active_cu_number += counter; 5430 if (i < 2 && j < 2) 5431 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 5432 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 5433 } 5434 } 5435 gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5436 mutex_unlock(&adev->grbm_idx_mutex); 5437 5438 cu_info->number = active_cu_number; 5439 cu_info->ao_cu_mask = ao_cu_mask; 5440 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 5441 5442 return 0; 5443 } 5444 5445 const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 5446 { 5447 .type = AMD_IP_BLOCK_TYPE_GFX, 5448 .major = 9, 5449 .minor = 0, 5450 .rev = 0, 5451 .funcs = &gfx_v9_0_ip_funcs, 5452 }; 5453