1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 122 123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 129 130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 153 154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 165 166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 r = amdgpu_gfx_scratch_get(adev, &scratch); 842 if (r) { 843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 844 return r; 845 } 846 WREG32(scratch, 0xCAFEDEAD); 847 r = amdgpu_ring_alloc(ring, 3); 848 if (r) { 849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 850 ring->idx, r); 851 amdgpu_gfx_scratch_free(adev, scratch); 852 return r; 853 } 854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 856 amdgpu_ring_write(ring, 0xDEADBEEF); 857 amdgpu_ring_commit(ring); 858 859 for (i = 0; i < adev->usec_timeout; i++) { 860 tmp = RREG32(scratch); 861 if (tmp == 0xDEADBEEF) 862 break; 863 DRM_UDELAY(1); 864 } 865 if (i < adev->usec_timeout) { 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 867 ring->idx, i); 868 } else { 869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 870 ring->idx, scratch, tmp); 871 r = -EINVAL; 872 } 873 amdgpu_gfx_scratch_free(adev, scratch); 874 return r; 875 } 876 877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 878 { 879 struct amdgpu_device *adev = ring->adev; 880 struct amdgpu_ib ib; 881 struct dma_fence *f = NULL; 882 883 unsigned int index; 884 uint64_t gpu_addr; 885 uint32_t tmp; 886 long r; 887 888 r = amdgpu_device_wb_get(adev, &index); 889 if (r) { 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 891 return r; 892 } 893 894 gpu_addr = adev->wb.gpu_addr + (index * 4); 895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 896 memset(&ib, 0, sizeof(ib)); 897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 898 if (r) { 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 900 goto err1; 901 } 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 DRM_ERROR("amdgpu: IB test timed out.\n"); 916 r = -ETIMEDOUT; 917 goto err2; 918 } else if (r < 0) { 919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 920 goto err2; 921 } 922 923 tmp = adev->wb.wb[index]; 924 if (tmp == 0xDEADBEEF) { 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 926 r = 0; 927 } else { 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 929 r = -EINVAL; 930 } 931 932 err2: 933 amdgpu_ib_free(adev, &ib, NULL); 934 dma_fence_put(f); 935 err1: 936 amdgpu_device_wb_free(adev, index); 937 return r; 938 } 939 940 941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 942 { 943 release_firmware(adev->gfx.pfp_fw); 944 adev->gfx.pfp_fw = NULL; 945 release_firmware(adev->gfx.me_fw); 946 adev->gfx.me_fw = NULL; 947 release_firmware(adev->gfx.ce_fw); 948 adev->gfx.ce_fw = NULL; 949 release_firmware(adev->gfx.rlc_fw); 950 adev->gfx.rlc_fw = NULL; 951 release_firmware(adev->gfx.mec_fw); 952 adev->gfx.mec_fw = NULL; 953 if ((adev->asic_type != CHIP_STONEY) && 954 (adev->asic_type != CHIP_TOPAZ)) 955 release_firmware(adev->gfx.mec2_fw); 956 adev->gfx.mec2_fw = NULL; 957 958 kfree(adev->gfx.rlc.register_list_format); 959 } 960 961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 962 { 963 const char *chip_name; 964 char fw_name[30]; 965 int err; 966 struct amdgpu_firmware_info *info = NULL; 967 const struct common_firmware_header *header = NULL; 968 const struct gfx_firmware_header_v1_0 *cp_hdr; 969 const struct rlc_firmware_header_v2_0 *rlc_hdr; 970 unsigned int *tmp = NULL, i; 971 972 DRM_DEBUG("\n"); 973 974 switch (adev->asic_type) { 975 case CHIP_TOPAZ: 976 chip_name = "topaz"; 977 break; 978 case CHIP_TONGA: 979 chip_name = "tonga"; 980 break; 981 case CHIP_CARRIZO: 982 chip_name = "carrizo"; 983 break; 984 case CHIP_FIJI: 985 chip_name = "fiji"; 986 break; 987 case CHIP_STONEY: 988 chip_name = "stoney"; 989 break; 990 case CHIP_POLARIS10: 991 chip_name = "polaris10"; 992 break; 993 case CHIP_POLARIS11: 994 chip_name = "polaris11"; 995 break; 996 case CHIP_POLARIS12: 997 chip_name = "polaris12"; 998 break; 999 case CHIP_VEGAM: 1000 chip_name = "vegam"; 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1029 if (err == -ENOENT) { 1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1032 } 1033 } else { 1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1036 } 1037 if (err) 1038 goto out; 1039 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1040 if (err) 1041 goto out; 1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1044 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err == -ENOENT) { 1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1053 } 1054 } else { 1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1057 } 1058 if (err) 1059 goto out; 1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1061 if (err) 1062 goto out; 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1066 1067 /* 1068 * Support for MCBP/Virtualization in combination with chained IBs is 1069 * formal released on feature version #46 1070 */ 1071 if (adev->gfx.ce_feature_version >= 46 && 1072 adev->gfx.pfp_feature_version >= 46) { 1073 adev->virt.chained_ib_support = true; 1074 DRM_INFO("Chained IB support enabled!\n"); 1075 } else 1076 adev->virt.chained_ib_support = false; 1077 1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1080 if (err) 1081 goto out; 1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1086 1087 adev->gfx.rlc.save_and_restore_offset = 1088 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1089 adev->gfx.rlc.clear_state_descriptor_offset = 1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1091 adev->gfx.rlc.avail_scratch_ram_locations = 1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1093 adev->gfx.rlc.reg_restore_list_size = 1094 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1095 adev->gfx.rlc.reg_list_format_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_start); 1097 adev->gfx.rlc.reg_list_format_separate_start = 1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1099 adev->gfx.rlc.starting_offsets_start = 1100 le32_to_cpu(rlc_hdr->starting_offsets_start); 1101 adev->gfx.rlc.reg_list_format_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1103 adev->gfx.rlc.reg_list_size_bytes = 1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1105 1106 adev->gfx.rlc.register_list_format = 1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1109 1110 if (!adev->gfx.rlc.register_list_format) { 1111 err = -ENOMEM; 1112 goto out; 1113 } 1114 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1117 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1119 1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1121 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1124 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1126 1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1130 if (err == -ENOENT) { 1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1133 } 1134 } else { 1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1137 } 1138 if (err) 1139 goto out; 1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1141 if (err) 1142 goto out; 1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1146 1147 if ((adev->asic_type != CHIP_STONEY) && 1148 (adev->asic_type != CHIP_TOPAZ)) { 1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1152 if (err == -ENOENT) { 1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1155 } 1156 } else { 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1159 } 1160 if (!err) { 1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1162 if (err) 1163 goto out; 1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1165 adev->gfx.mec2_fw->data; 1166 adev->gfx.mec2_fw_version = 1167 le32_to_cpu(cp_hdr->header.ucode_version); 1168 adev->gfx.mec2_feature_version = 1169 le32_to_cpu(cp_hdr->ucode_feature_version); 1170 } else { 1171 err = 0; 1172 adev->gfx.mec2_fw = NULL; 1173 } 1174 } 1175 1176 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1177 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1178 info->fw = adev->gfx.pfp_fw; 1179 header = (const struct common_firmware_header *)info->fw->data; 1180 adev->firmware.fw_size += 1181 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1182 1183 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1184 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1185 info->fw = adev->gfx.me_fw; 1186 header = (const struct common_firmware_header *)info->fw->data; 1187 adev->firmware.fw_size += 1188 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1189 1190 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1191 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1192 info->fw = adev->gfx.ce_fw; 1193 header = (const struct common_firmware_header *)info->fw->data; 1194 adev->firmware.fw_size += 1195 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1196 1197 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1198 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1199 info->fw = adev->gfx.rlc_fw; 1200 header = (const struct common_firmware_header *)info->fw->data; 1201 adev->firmware.fw_size += 1202 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1203 1204 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1205 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1206 info->fw = adev->gfx.mec_fw; 1207 header = (const struct common_firmware_header *)info->fw->data; 1208 adev->firmware.fw_size += 1209 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1210 1211 /* we need account JT in */ 1212 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1213 adev->firmware.fw_size += 1214 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1215 1216 if (amdgpu_sriov_vf(adev)) { 1217 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1218 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1219 info->fw = adev->gfx.mec_fw; 1220 adev->firmware.fw_size += 1221 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1222 } 1223 1224 if (adev->gfx.mec2_fw) { 1225 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1226 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1227 info->fw = adev->gfx.mec2_fw; 1228 header = (const struct common_firmware_header *)info->fw->data; 1229 adev->firmware.fw_size += 1230 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1231 } 1232 1233 out: 1234 if (err) { 1235 dev_err(adev->dev, 1236 "gfx8: Failed to load firmware \"%s\"\n", 1237 fw_name); 1238 release_firmware(adev->gfx.pfp_fw); 1239 adev->gfx.pfp_fw = NULL; 1240 release_firmware(adev->gfx.me_fw); 1241 adev->gfx.me_fw = NULL; 1242 release_firmware(adev->gfx.ce_fw); 1243 adev->gfx.ce_fw = NULL; 1244 release_firmware(adev->gfx.rlc_fw); 1245 adev->gfx.rlc_fw = NULL; 1246 release_firmware(adev->gfx.mec_fw); 1247 adev->gfx.mec_fw = NULL; 1248 release_firmware(adev->gfx.mec2_fw); 1249 adev->gfx.mec2_fw = NULL; 1250 } 1251 return err; 1252 } 1253 1254 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1255 volatile u32 *buffer) 1256 { 1257 u32 count = 0, i; 1258 const struct cs_section_def *sect = NULL; 1259 const struct cs_extent_def *ext = NULL; 1260 1261 if (adev->gfx.rlc.cs_data == NULL) 1262 return; 1263 if (buffer == NULL) 1264 return; 1265 1266 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1267 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1268 1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1270 buffer[count++] = cpu_to_le32(0x80000000); 1271 buffer[count++] = cpu_to_le32(0x80000000); 1272 1273 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1274 for (ext = sect->section; ext->extent != NULL; ++ext) { 1275 if (sect->id == SECT_CONTEXT) { 1276 buffer[count++] = 1277 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1278 buffer[count++] = cpu_to_le32(ext->reg_index - 1279 PACKET3_SET_CONTEXT_REG_START); 1280 for (i = 0; i < ext->reg_count; i++) 1281 buffer[count++] = cpu_to_le32(ext->extent[i]); 1282 } else { 1283 return; 1284 } 1285 } 1286 } 1287 1288 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1289 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1290 PACKET3_SET_CONTEXT_REG_START); 1291 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1292 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1293 1294 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1295 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1296 1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1298 buffer[count++] = cpu_to_le32(0); 1299 } 1300 1301 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1302 { 1303 const __le32 *fw_data; 1304 volatile u32 *dst_ptr; 1305 int me, i, max_me = 4; 1306 u32 bo_offset = 0; 1307 u32 table_offset, table_size; 1308 1309 if (adev->asic_type == CHIP_CARRIZO) 1310 max_me = 5; 1311 1312 /* write the cp table buffer */ 1313 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1314 for (me = 0; me < max_me; me++) { 1315 if (me == 0) { 1316 const struct gfx_firmware_header_v1_0 *hdr = 1317 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1318 fw_data = (const __le32 *) 1319 (adev->gfx.ce_fw->data + 1320 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1321 table_offset = le32_to_cpu(hdr->jt_offset); 1322 table_size = le32_to_cpu(hdr->jt_size); 1323 } else if (me == 1) { 1324 const struct gfx_firmware_header_v1_0 *hdr = 1325 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1326 fw_data = (const __le32 *) 1327 (adev->gfx.pfp_fw->data + 1328 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1329 table_offset = le32_to_cpu(hdr->jt_offset); 1330 table_size = le32_to_cpu(hdr->jt_size); 1331 } else if (me == 2) { 1332 const struct gfx_firmware_header_v1_0 *hdr = 1333 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1334 fw_data = (const __le32 *) 1335 (adev->gfx.me_fw->data + 1336 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1337 table_offset = le32_to_cpu(hdr->jt_offset); 1338 table_size = le32_to_cpu(hdr->jt_size); 1339 } else if (me == 3) { 1340 const struct gfx_firmware_header_v1_0 *hdr = 1341 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1342 fw_data = (const __le32 *) 1343 (adev->gfx.mec_fw->data + 1344 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1345 table_offset = le32_to_cpu(hdr->jt_offset); 1346 table_size = le32_to_cpu(hdr->jt_size); 1347 } else if (me == 4) { 1348 const struct gfx_firmware_header_v1_0 *hdr = 1349 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1350 fw_data = (const __le32 *) 1351 (adev->gfx.mec2_fw->data + 1352 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1353 table_offset = le32_to_cpu(hdr->jt_offset); 1354 table_size = le32_to_cpu(hdr->jt_size); 1355 } 1356 1357 for (i = 0; i < table_size; i ++) { 1358 dst_ptr[bo_offset + i] = 1359 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1360 } 1361 1362 bo_offset += table_size; 1363 } 1364 } 1365 1366 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1367 { 1368 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1369 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1370 } 1371 1372 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1373 { 1374 volatile u32 *dst_ptr; 1375 u32 dws; 1376 const struct cs_section_def *cs_data; 1377 int r; 1378 1379 adev->gfx.rlc.cs_data = vi_cs_data; 1380 1381 cs_data = adev->gfx.rlc.cs_data; 1382 1383 if (cs_data) { 1384 /* clear state block */ 1385 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1386 1387 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1388 AMDGPU_GEM_DOMAIN_VRAM, 1389 &adev->gfx.rlc.clear_state_obj, 1390 &adev->gfx.rlc.clear_state_gpu_addr, 1391 (void **)&adev->gfx.rlc.cs_ptr); 1392 if (r) { 1393 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1394 gfx_v8_0_rlc_fini(adev); 1395 return r; 1396 } 1397 1398 /* set up the cs buffer */ 1399 dst_ptr = adev->gfx.rlc.cs_ptr; 1400 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1401 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1402 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1403 } 1404 1405 if ((adev->asic_type == CHIP_CARRIZO) || 1406 (adev->asic_type == CHIP_STONEY)) { 1407 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1408 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1409 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1410 &adev->gfx.rlc.cp_table_obj, 1411 &adev->gfx.rlc.cp_table_gpu_addr, 1412 (void **)&adev->gfx.rlc.cp_table_ptr); 1413 if (r) { 1414 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1415 return r; 1416 } 1417 1418 cz_init_cp_jump_table(adev); 1419 1420 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1421 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1422 } 1423 1424 return 0; 1425 } 1426 1427 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1428 { 1429 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1430 } 1431 1432 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1433 { 1434 int r; 1435 u32 *hpd; 1436 size_t mec_hpd_size; 1437 1438 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1439 1440 /* take ownership of the relevant compute queues */ 1441 amdgpu_gfx_compute_queue_acquire(adev); 1442 1443 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1444 1445 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1446 AMDGPU_GEM_DOMAIN_GTT, 1447 &adev->gfx.mec.hpd_eop_obj, 1448 &adev->gfx.mec.hpd_eop_gpu_addr, 1449 (void **)&hpd); 1450 if (r) { 1451 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1452 return r; 1453 } 1454 1455 memset(hpd, 0, mec_hpd_size); 1456 1457 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1458 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1459 1460 return 0; 1461 } 1462 1463 static const u32 vgpr_init_compute_shader[] = 1464 { 1465 0x7e000209, 0x7e020208, 1466 0x7e040207, 0x7e060206, 1467 0x7e080205, 0x7e0a0204, 1468 0x7e0c0203, 0x7e0e0202, 1469 0x7e100201, 0x7e120200, 1470 0x7e140209, 0x7e160208, 1471 0x7e180207, 0x7e1a0206, 1472 0x7e1c0205, 0x7e1e0204, 1473 0x7e200203, 0x7e220202, 1474 0x7e240201, 0x7e260200, 1475 0x7e280209, 0x7e2a0208, 1476 0x7e2c0207, 0x7e2e0206, 1477 0x7e300205, 0x7e320204, 1478 0x7e340203, 0x7e360202, 1479 0x7e380201, 0x7e3a0200, 1480 0x7e3c0209, 0x7e3e0208, 1481 0x7e400207, 0x7e420206, 1482 0x7e440205, 0x7e460204, 1483 0x7e480203, 0x7e4a0202, 1484 0x7e4c0201, 0x7e4e0200, 1485 0x7e500209, 0x7e520208, 1486 0x7e540207, 0x7e560206, 1487 0x7e580205, 0x7e5a0204, 1488 0x7e5c0203, 0x7e5e0202, 1489 0x7e600201, 0x7e620200, 1490 0x7e640209, 0x7e660208, 1491 0x7e680207, 0x7e6a0206, 1492 0x7e6c0205, 0x7e6e0204, 1493 0x7e700203, 0x7e720202, 1494 0x7e740201, 0x7e760200, 1495 0x7e780209, 0x7e7a0208, 1496 0x7e7c0207, 0x7e7e0206, 1497 0xbf8a0000, 0xbf810000, 1498 }; 1499 1500 static const u32 sgpr_init_compute_shader[] = 1501 { 1502 0xbe8a0100, 0xbe8c0102, 1503 0xbe8e0104, 0xbe900106, 1504 0xbe920108, 0xbe940100, 1505 0xbe960102, 0xbe980104, 1506 0xbe9a0106, 0xbe9c0108, 1507 0xbe9e0100, 0xbea00102, 1508 0xbea20104, 0xbea40106, 1509 0xbea60108, 0xbea80100, 1510 0xbeaa0102, 0xbeac0104, 1511 0xbeae0106, 0xbeb00108, 1512 0xbeb20100, 0xbeb40102, 1513 0xbeb60104, 0xbeb80106, 1514 0xbeba0108, 0xbebc0100, 1515 0xbebe0102, 0xbec00104, 1516 0xbec20106, 0xbec40108, 1517 0xbec60100, 0xbec80102, 1518 0xbee60004, 0xbee70005, 1519 0xbeea0006, 0xbeeb0007, 1520 0xbee80008, 0xbee90009, 1521 0xbefc0000, 0xbf8a0000, 1522 0xbf810000, 0x00000000, 1523 }; 1524 1525 static const u32 vgpr_init_regs[] = 1526 { 1527 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1528 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1529 mmCOMPUTE_NUM_THREAD_X, 256*4, 1530 mmCOMPUTE_NUM_THREAD_Y, 1, 1531 mmCOMPUTE_NUM_THREAD_Z, 1, 1532 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1533 mmCOMPUTE_PGM_RSRC2, 20, 1534 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1535 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1536 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1537 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1538 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1539 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1540 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1541 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1542 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1543 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1544 }; 1545 1546 static const u32 sgpr1_init_regs[] = 1547 { 1548 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1549 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1550 mmCOMPUTE_NUM_THREAD_X, 256*5, 1551 mmCOMPUTE_NUM_THREAD_Y, 1, 1552 mmCOMPUTE_NUM_THREAD_Z, 1, 1553 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1554 mmCOMPUTE_PGM_RSRC2, 20, 1555 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1556 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1557 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1558 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1559 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1560 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1561 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1562 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1563 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1564 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1565 }; 1566 1567 static const u32 sgpr2_init_regs[] = 1568 { 1569 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1570 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1571 mmCOMPUTE_NUM_THREAD_X, 256*5, 1572 mmCOMPUTE_NUM_THREAD_Y, 1, 1573 mmCOMPUTE_NUM_THREAD_Z, 1, 1574 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1575 mmCOMPUTE_PGM_RSRC2, 20, 1576 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1577 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1578 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1579 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1580 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1581 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1582 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1583 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1584 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1585 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1586 }; 1587 1588 static const u32 sec_ded_counter_registers[] = 1589 { 1590 mmCPC_EDC_ATC_CNT, 1591 mmCPC_EDC_SCRATCH_CNT, 1592 mmCPC_EDC_UCODE_CNT, 1593 mmCPF_EDC_ATC_CNT, 1594 mmCPF_EDC_ROQ_CNT, 1595 mmCPF_EDC_TAG_CNT, 1596 mmCPG_EDC_ATC_CNT, 1597 mmCPG_EDC_DMA_CNT, 1598 mmCPG_EDC_TAG_CNT, 1599 mmDC_EDC_CSINVOC_CNT, 1600 mmDC_EDC_RESTORE_CNT, 1601 mmDC_EDC_STATE_CNT, 1602 mmGDS_EDC_CNT, 1603 mmGDS_EDC_GRBM_CNT, 1604 mmGDS_EDC_OA_DED, 1605 mmSPI_EDC_CNT, 1606 mmSQC_ATC_EDC_GATCL1_CNT, 1607 mmSQC_EDC_CNT, 1608 mmSQ_EDC_DED_CNT, 1609 mmSQ_EDC_INFO, 1610 mmSQ_EDC_SEC_CNT, 1611 mmTCC_EDC_CNT, 1612 mmTCP_ATC_EDC_GATCL1_CNT, 1613 mmTCP_EDC_CNT, 1614 mmTD_EDC_CNT 1615 }; 1616 1617 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1618 { 1619 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1620 struct amdgpu_ib ib; 1621 struct dma_fence *f = NULL; 1622 int r, i; 1623 u32 tmp; 1624 unsigned total_size, vgpr_offset, sgpr_offset; 1625 u64 gpu_addr; 1626 1627 /* only supported on CZ */ 1628 if (adev->asic_type != CHIP_CARRIZO) 1629 return 0; 1630 1631 /* bail if the compute ring is not ready */ 1632 if (!ring->ready) 1633 return 0; 1634 1635 tmp = RREG32(mmGB_EDC_MODE); 1636 WREG32(mmGB_EDC_MODE, 0); 1637 1638 total_size = 1639 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1640 total_size += 1641 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1642 total_size += 1643 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1644 total_size = ALIGN(total_size, 256); 1645 vgpr_offset = total_size; 1646 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1647 sgpr_offset = total_size; 1648 total_size += sizeof(sgpr_init_compute_shader); 1649 1650 /* allocate an indirect buffer to put the commands in */ 1651 memset(&ib, 0, sizeof(ib)); 1652 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1653 if (r) { 1654 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1655 return r; 1656 } 1657 1658 /* load the compute shaders */ 1659 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1660 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1661 1662 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1663 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1664 1665 /* init the ib length to 0 */ 1666 ib.length_dw = 0; 1667 1668 /* VGPR */ 1669 /* write the register state for the compute dispatch */ 1670 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1672 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1673 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1674 } 1675 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1676 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1678 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1679 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1680 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1681 1682 /* write dispatch packet */ 1683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1684 ib.ptr[ib.length_dw++] = 8; /* x */ 1685 ib.ptr[ib.length_dw++] = 1; /* y */ 1686 ib.ptr[ib.length_dw++] = 1; /* z */ 1687 ib.ptr[ib.length_dw++] = 1688 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1689 1690 /* write CS partial flush packet */ 1691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1692 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1693 1694 /* SGPR1 */ 1695 /* write the register state for the compute dispatch */ 1696 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1697 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1698 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1699 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1700 } 1701 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1702 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1703 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1704 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1705 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1706 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1707 1708 /* write dispatch packet */ 1709 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1710 ib.ptr[ib.length_dw++] = 8; /* x */ 1711 ib.ptr[ib.length_dw++] = 1; /* y */ 1712 ib.ptr[ib.length_dw++] = 1; /* z */ 1713 ib.ptr[ib.length_dw++] = 1714 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1715 1716 /* write CS partial flush packet */ 1717 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1718 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1719 1720 /* SGPR2 */ 1721 /* write the register state for the compute dispatch */ 1722 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1723 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1724 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1725 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1726 } 1727 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1728 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1729 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1730 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1731 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1732 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1733 1734 /* write dispatch packet */ 1735 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1736 ib.ptr[ib.length_dw++] = 8; /* x */ 1737 ib.ptr[ib.length_dw++] = 1; /* y */ 1738 ib.ptr[ib.length_dw++] = 1; /* z */ 1739 ib.ptr[ib.length_dw++] = 1740 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1741 1742 /* write CS partial flush packet */ 1743 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1744 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1745 1746 /* shedule the ib on the ring */ 1747 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1748 if (r) { 1749 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1750 goto fail; 1751 } 1752 1753 /* wait for the GPU to finish processing the IB */ 1754 r = dma_fence_wait(f, false); 1755 if (r) { 1756 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1757 goto fail; 1758 } 1759 1760 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1761 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1762 WREG32(mmGB_EDC_MODE, tmp); 1763 1764 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1765 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1766 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1767 1768 1769 /* read back registers to clear the counters */ 1770 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1771 RREG32(sec_ded_counter_registers[i]); 1772 1773 fail: 1774 amdgpu_ib_free(adev, &ib, NULL); 1775 dma_fence_put(f); 1776 1777 return r; 1778 } 1779 1780 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1781 { 1782 u32 gb_addr_config; 1783 u32 mc_shared_chmap, mc_arb_ramcfg; 1784 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1785 u32 tmp; 1786 int ret; 1787 1788 switch (adev->asic_type) { 1789 case CHIP_TOPAZ: 1790 adev->gfx.config.max_shader_engines = 1; 1791 adev->gfx.config.max_tile_pipes = 2; 1792 adev->gfx.config.max_cu_per_sh = 6; 1793 adev->gfx.config.max_sh_per_se = 1; 1794 adev->gfx.config.max_backends_per_se = 2; 1795 adev->gfx.config.max_texture_channel_caches = 2; 1796 adev->gfx.config.max_gprs = 256; 1797 adev->gfx.config.max_gs_threads = 32; 1798 adev->gfx.config.max_hw_contexts = 8; 1799 1800 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1801 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1802 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1803 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1804 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1805 break; 1806 case CHIP_FIJI: 1807 adev->gfx.config.max_shader_engines = 4; 1808 adev->gfx.config.max_tile_pipes = 16; 1809 adev->gfx.config.max_cu_per_sh = 16; 1810 adev->gfx.config.max_sh_per_se = 1; 1811 adev->gfx.config.max_backends_per_se = 4; 1812 adev->gfx.config.max_texture_channel_caches = 16; 1813 adev->gfx.config.max_gprs = 256; 1814 adev->gfx.config.max_gs_threads = 32; 1815 adev->gfx.config.max_hw_contexts = 8; 1816 1817 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1818 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1819 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1820 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1821 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1822 break; 1823 case CHIP_POLARIS11: 1824 case CHIP_POLARIS12: 1825 ret = amdgpu_atombios_get_gfx_info(adev); 1826 if (ret) 1827 return ret; 1828 adev->gfx.config.max_gprs = 256; 1829 adev->gfx.config.max_gs_threads = 32; 1830 adev->gfx.config.max_hw_contexts = 8; 1831 1832 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1833 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1834 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1835 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1836 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1837 break; 1838 case CHIP_POLARIS10: 1839 case CHIP_VEGAM: 1840 ret = amdgpu_atombios_get_gfx_info(adev); 1841 if (ret) 1842 return ret; 1843 adev->gfx.config.max_gprs = 256; 1844 adev->gfx.config.max_gs_threads = 32; 1845 adev->gfx.config.max_hw_contexts = 8; 1846 1847 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1848 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1849 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1850 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1851 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1852 break; 1853 case CHIP_TONGA: 1854 adev->gfx.config.max_shader_engines = 4; 1855 adev->gfx.config.max_tile_pipes = 8; 1856 adev->gfx.config.max_cu_per_sh = 8; 1857 adev->gfx.config.max_sh_per_se = 1; 1858 adev->gfx.config.max_backends_per_se = 2; 1859 adev->gfx.config.max_texture_channel_caches = 8; 1860 adev->gfx.config.max_gprs = 256; 1861 adev->gfx.config.max_gs_threads = 32; 1862 adev->gfx.config.max_hw_contexts = 8; 1863 1864 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1865 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1866 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1867 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1868 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1869 break; 1870 case CHIP_CARRIZO: 1871 adev->gfx.config.max_shader_engines = 1; 1872 adev->gfx.config.max_tile_pipes = 2; 1873 adev->gfx.config.max_sh_per_se = 1; 1874 adev->gfx.config.max_backends_per_se = 2; 1875 adev->gfx.config.max_cu_per_sh = 8; 1876 adev->gfx.config.max_texture_channel_caches = 2; 1877 adev->gfx.config.max_gprs = 256; 1878 adev->gfx.config.max_gs_threads = 32; 1879 adev->gfx.config.max_hw_contexts = 8; 1880 1881 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1882 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1883 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1884 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1885 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1886 break; 1887 case CHIP_STONEY: 1888 adev->gfx.config.max_shader_engines = 1; 1889 adev->gfx.config.max_tile_pipes = 2; 1890 adev->gfx.config.max_sh_per_se = 1; 1891 adev->gfx.config.max_backends_per_se = 1; 1892 adev->gfx.config.max_cu_per_sh = 3; 1893 adev->gfx.config.max_texture_channel_caches = 2; 1894 adev->gfx.config.max_gprs = 256; 1895 adev->gfx.config.max_gs_threads = 16; 1896 adev->gfx.config.max_hw_contexts = 8; 1897 1898 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1899 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1900 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1901 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1902 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1903 break; 1904 default: 1905 adev->gfx.config.max_shader_engines = 2; 1906 adev->gfx.config.max_tile_pipes = 4; 1907 adev->gfx.config.max_cu_per_sh = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 adev->gfx.config.max_texture_channel_caches = 4; 1911 adev->gfx.config.max_gprs = 256; 1912 adev->gfx.config.max_gs_threads = 32; 1913 adev->gfx.config.max_hw_contexts = 8; 1914 1915 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1916 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1917 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1918 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1919 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1920 break; 1921 } 1922 1923 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1924 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1925 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1926 1927 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1928 adev->gfx.config.mem_max_burst_length_bytes = 256; 1929 if (adev->flags & AMD_IS_APU) { 1930 /* Get memory bank mapping mode. */ 1931 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1932 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1933 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1934 1935 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1936 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1937 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1938 1939 /* Validate settings in case only one DIMM installed. */ 1940 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1941 dimm00_addr_map = 0; 1942 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1943 dimm01_addr_map = 0; 1944 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1945 dimm10_addr_map = 0; 1946 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1947 dimm11_addr_map = 0; 1948 1949 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1950 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1951 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1952 adev->gfx.config.mem_row_size_in_kb = 2; 1953 else 1954 adev->gfx.config.mem_row_size_in_kb = 1; 1955 } else { 1956 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1957 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1958 if (adev->gfx.config.mem_row_size_in_kb > 4) 1959 adev->gfx.config.mem_row_size_in_kb = 4; 1960 } 1961 1962 adev->gfx.config.shader_engine_tile_size = 32; 1963 adev->gfx.config.num_gpus = 1; 1964 adev->gfx.config.multi_gpu_tile_size = 64; 1965 1966 /* fix up row size */ 1967 switch (adev->gfx.config.mem_row_size_in_kb) { 1968 case 1: 1969 default: 1970 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1971 break; 1972 case 2: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1974 break; 1975 case 4: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1977 break; 1978 } 1979 adev->gfx.config.gb_addr_config = gb_addr_config; 1980 1981 return 0; 1982 } 1983 1984 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1985 int mec, int pipe, int queue) 1986 { 1987 int r; 1988 unsigned irq_type; 1989 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1990 1991 ring = &adev->gfx.compute_ring[ring_id]; 1992 1993 /* mec0 is me1 */ 1994 ring->me = mec + 1; 1995 ring->pipe = pipe; 1996 ring->queue = queue; 1997 1998 ring->ring_obj = NULL; 1999 ring->use_doorbell = true; 2000 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2001 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2002 + (ring_id * GFX8_MEC_HPD_SIZE); 2003 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2004 2005 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2006 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2007 + ring->pipe; 2008 2009 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2010 r = amdgpu_ring_init(adev, ring, 1024, 2011 &adev->gfx.eop_irq, irq_type); 2012 if (r) 2013 return r; 2014 2015 2016 return 0; 2017 } 2018 2019 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2020 2021 static int gfx_v8_0_sw_init(void *handle) 2022 { 2023 int i, j, k, r, ring_id; 2024 struct amdgpu_ring *ring; 2025 struct amdgpu_kiq *kiq; 2026 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2027 2028 switch (adev->asic_type) { 2029 case CHIP_TONGA: 2030 case CHIP_CARRIZO: 2031 case CHIP_FIJI: 2032 case CHIP_POLARIS10: 2033 case CHIP_POLARIS11: 2034 case CHIP_POLARIS12: 2035 case CHIP_VEGAM: 2036 adev->gfx.mec.num_mec = 2; 2037 break; 2038 case CHIP_TOPAZ: 2039 case CHIP_STONEY: 2040 default: 2041 adev->gfx.mec.num_mec = 1; 2042 break; 2043 } 2044 2045 adev->gfx.mec.num_pipe_per_mec = 4; 2046 adev->gfx.mec.num_queue_per_pipe = 8; 2047 2048 /* EOP Event */ 2049 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2050 if (r) 2051 return r; 2052 2053 /* Privileged reg */ 2054 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2055 &adev->gfx.priv_reg_irq); 2056 if (r) 2057 return r; 2058 2059 /* Privileged inst */ 2060 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2061 &adev->gfx.priv_inst_irq); 2062 if (r) 2063 return r; 2064 2065 /* Add CP EDC/ECC irq */ 2066 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2067 &adev->gfx.cp_ecc_error_irq); 2068 if (r) 2069 return r; 2070 2071 /* SQ interrupts. */ 2072 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2073 &adev->gfx.sq_irq); 2074 if (r) { 2075 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2076 return r; 2077 } 2078 2079 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2080 2081 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2082 2083 gfx_v8_0_scratch_init(adev); 2084 2085 r = gfx_v8_0_init_microcode(adev); 2086 if (r) { 2087 DRM_ERROR("Failed to load gfx firmware!\n"); 2088 return r; 2089 } 2090 2091 r = gfx_v8_0_rlc_init(adev); 2092 if (r) { 2093 DRM_ERROR("Failed to init rlc BOs!\n"); 2094 return r; 2095 } 2096 2097 r = gfx_v8_0_mec_init(adev); 2098 if (r) { 2099 DRM_ERROR("Failed to init MEC BOs!\n"); 2100 return r; 2101 } 2102 2103 /* set up the gfx ring */ 2104 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2105 ring = &adev->gfx.gfx_ring[i]; 2106 ring->ring_obj = NULL; 2107 sprintf(ring->name, "gfx"); 2108 /* no gfx doorbells on iceland */ 2109 if (adev->asic_type != CHIP_TOPAZ) { 2110 ring->use_doorbell = true; 2111 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2112 } 2113 2114 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2115 AMDGPU_CP_IRQ_GFX_EOP); 2116 if (r) 2117 return r; 2118 } 2119 2120 2121 /* set up the compute queues - allocate horizontally across pipes */ 2122 ring_id = 0; 2123 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2124 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2125 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2126 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2127 continue; 2128 2129 r = gfx_v8_0_compute_ring_init(adev, 2130 ring_id, 2131 i, k, j); 2132 if (r) 2133 return r; 2134 2135 ring_id++; 2136 } 2137 } 2138 } 2139 2140 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2141 if (r) { 2142 DRM_ERROR("Failed to init KIQ BOs!\n"); 2143 return r; 2144 } 2145 2146 kiq = &adev->gfx.kiq; 2147 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2148 if (r) 2149 return r; 2150 2151 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2152 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2153 if (r) 2154 return r; 2155 2156 adev->gfx.ce_ram_size = 0x8000; 2157 2158 r = gfx_v8_0_gpu_early_init(adev); 2159 if (r) 2160 return r; 2161 2162 return 0; 2163 } 2164 2165 static int gfx_v8_0_sw_fini(void *handle) 2166 { 2167 int i; 2168 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2169 2170 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2171 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2172 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2173 2174 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2175 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2176 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2177 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2178 2179 amdgpu_gfx_compute_mqd_sw_fini(adev); 2180 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2181 amdgpu_gfx_kiq_fini(adev); 2182 2183 gfx_v8_0_mec_fini(adev); 2184 gfx_v8_0_rlc_fini(adev); 2185 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2186 &adev->gfx.rlc.clear_state_gpu_addr, 2187 (void **)&adev->gfx.rlc.cs_ptr); 2188 if ((adev->asic_type == CHIP_CARRIZO) || 2189 (adev->asic_type == CHIP_STONEY)) { 2190 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2191 &adev->gfx.rlc.cp_table_gpu_addr, 2192 (void **)&adev->gfx.rlc.cp_table_ptr); 2193 } 2194 gfx_v8_0_free_microcode(adev); 2195 2196 return 0; 2197 } 2198 2199 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2200 { 2201 uint32_t *modearray, *mod2array; 2202 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2203 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2204 u32 reg_offset; 2205 2206 modearray = adev->gfx.config.tile_mode_array; 2207 mod2array = adev->gfx.config.macrotile_mode_array; 2208 2209 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2210 modearray[reg_offset] = 0; 2211 2212 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2213 mod2array[reg_offset] = 0; 2214 2215 switch (adev->asic_type) { 2216 case CHIP_TOPAZ: 2217 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2218 PIPE_CONFIG(ADDR_SURF_P2) | 2219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2221 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2222 PIPE_CONFIG(ADDR_SURF_P2) | 2223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2225 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2226 PIPE_CONFIG(ADDR_SURF_P2) | 2227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2229 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2230 PIPE_CONFIG(ADDR_SURF_P2) | 2231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2233 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2234 PIPE_CONFIG(ADDR_SURF_P2) | 2235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2237 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2238 PIPE_CONFIG(ADDR_SURF_P2) | 2239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2241 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2242 PIPE_CONFIG(ADDR_SURF_P2) | 2243 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2245 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2246 PIPE_CONFIG(ADDR_SURF_P2)); 2247 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2248 PIPE_CONFIG(ADDR_SURF_P2) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2251 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2252 PIPE_CONFIG(ADDR_SURF_P2) | 2253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2255 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2256 PIPE_CONFIG(ADDR_SURF_P2) | 2257 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2259 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2263 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2267 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2271 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2275 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2279 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2283 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2287 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2288 PIPE_CONFIG(ADDR_SURF_P2) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2291 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2292 PIPE_CONFIG(ADDR_SURF_P2) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2295 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2296 PIPE_CONFIG(ADDR_SURF_P2) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2299 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2300 PIPE_CONFIG(ADDR_SURF_P2) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2303 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2304 PIPE_CONFIG(ADDR_SURF_P2) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2307 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2308 PIPE_CONFIG(ADDR_SURF_P2) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2311 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2312 PIPE_CONFIG(ADDR_SURF_P2) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2315 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P2) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2319 2320 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2323 NUM_BANKS(ADDR_SURF_8_BANK)); 2324 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2327 NUM_BANKS(ADDR_SURF_8_BANK)); 2328 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2331 NUM_BANKS(ADDR_SURF_8_BANK)); 2332 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2335 NUM_BANKS(ADDR_SURF_8_BANK)); 2336 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2339 NUM_BANKS(ADDR_SURF_8_BANK)); 2340 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2343 NUM_BANKS(ADDR_SURF_8_BANK)); 2344 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2347 NUM_BANKS(ADDR_SURF_8_BANK)); 2348 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2351 NUM_BANKS(ADDR_SURF_16_BANK)); 2352 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2355 NUM_BANKS(ADDR_SURF_16_BANK)); 2356 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2359 NUM_BANKS(ADDR_SURF_16_BANK)); 2360 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2363 NUM_BANKS(ADDR_SURF_16_BANK)); 2364 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2367 NUM_BANKS(ADDR_SURF_16_BANK)); 2368 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2371 NUM_BANKS(ADDR_SURF_16_BANK)); 2372 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2375 NUM_BANKS(ADDR_SURF_8_BANK)); 2376 2377 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2378 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2379 reg_offset != 23) 2380 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2381 2382 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2383 if (reg_offset != 7) 2384 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2385 2386 break; 2387 case CHIP_FIJI: 2388 case CHIP_VEGAM: 2389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2390 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2414 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2417 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2421 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2423 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2427 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2431 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2435 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2439 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2440 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2443 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2444 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2447 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2448 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2451 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2452 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2455 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2456 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2459 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2460 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2463 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2464 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2467 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2468 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2471 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2472 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2475 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2476 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2479 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2480 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2483 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2484 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2487 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2488 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2491 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2492 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2495 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2496 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2499 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2500 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2503 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2504 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2507 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2508 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2511 2512 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2515 NUM_BANKS(ADDR_SURF_8_BANK)); 2516 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2519 NUM_BANKS(ADDR_SURF_8_BANK)); 2520 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2523 NUM_BANKS(ADDR_SURF_8_BANK)); 2524 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2527 NUM_BANKS(ADDR_SURF_8_BANK)); 2528 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2531 NUM_BANKS(ADDR_SURF_8_BANK)); 2532 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2535 NUM_BANKS(ADDR_SURF_8_BANK)); 2536 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2539 NUM_BANKS(ADDR_SURF_8_BANK)); 2540 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2543 NUM_BANKS(ADDR_SURF_8_BANK)); 2544 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2547 NUM_BANKS(ADDR_SURF_8_BANK)); 2548 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2551 NUM_BANKS(ADDR_SURF_8_BANK)); 2552 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2555 NUM_BANKS(ADDR_SURF_8_BANK)); 2556 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2559 NUM_BANKS(ADDR_SURF_8_BANK)); 2560 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2563 NUM_BANKS(ADDR_SURF_8_BANK)); 2564 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2567 NUM_BANKS(ADDR_SURF_4_BANK)); 2568 2569 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2570 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2571 2572 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2573 if (reg_offset != 7) 2574 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2575 2576 break; 2577 case CHIP_TONGA: 2578 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2579 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2580 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2582 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2584 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2586 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2590 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2594 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2598 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2602 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2603 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2604 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2606 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2610 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2612 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2616 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2620 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2624 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2628 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2632 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2636 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2640 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2644 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2648 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2649 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2652 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2653 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2656 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2657 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2660 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2661 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2664 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2665 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2668 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2672 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2673 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2676 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2677 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2680 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2681 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2684 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2685 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2688 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2689 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2692 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2693 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2696 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2700 2701 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2704 NUM_BANKS(ADDR_SURF_16_BANK)); 2705 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2706 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2707 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2708 NUM_BANKS(ADDR_SURF_16_BANK)); 2709 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2710 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2711 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2712 NUM_BANKS(ADDR_SURF_16_BANK)); 2713 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2716 NUM_BANKS(ADDR_SURF_16_BANK)); 2717 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2718 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2719 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2720 NUM_BANKS(ADDR_SURF_16_BANK)); 2721 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2722 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2723 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2724 NUM_BANKS(ADDR_SURF_16_BANK)); 2725 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2728 NUM_BANKS(ADDR_SURF_16_BANK)); 2729 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2730 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2731 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2732 NUM_BANKS(ADDR_SURF_16_BANK)); 2733 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2734 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2735 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2736 NUM_BANKS(ADDR_SURF_16_BANK)); 2737 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2740 NUM_BANKS(ADDR_SURF_16_BANK)); 2741 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2742 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2743 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2744 NUM_BANKS(ADDR_SURF_16_BANK)); 2745 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2748 NUM_BANKS(ADDR_SURF_8_BANK)); 2749 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2752 NUM_BANKS(ADDR_SURF_4_BANK)); 2753 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2754 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2755 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2756 NUM_BANKS(ADDR_SURF_4_BANK)); 2757 2758 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2759 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2760 2761 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2762 if (reg_offset != 7) 2763 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2764 2765 break; 2766 case CHIP_POLARIS11: 2767 case CHIP_POLARIS12: 2768 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2770 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2772 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2774 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2776 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2780 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2784 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2788 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2792 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2796 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2800 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2802 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2804 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2806 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2808 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2810 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2814 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2818 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2819 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2822 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2823 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2826 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2827 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2830 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2834 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2838 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2839 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2842 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2843 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2846 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2847 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2850 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2854 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2855 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2858 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2862 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2863 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2866 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2867 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2870 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2871 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2874 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2878 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2879 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2882 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2883 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2886 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2887 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2890 2891 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2892 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2893 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2894 NUM_BANKS(ADDR_SURF_16_BANK)); 2895 2896 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2897 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2898 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2899 NUM_BANKS(ADDR_SURF_16_BANK)); 2900 2901 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2904 NUM_BANKS(ADDR_SURF_16_BANK)); 2905 2906 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2907 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2908 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2909 NUM_BANKS(ADDR_SURF_16_BANK)); 2910 2911 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2912 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2913 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2914 NUM_BANKS(ADDR_SURF_16_BANK)); 2915 2916 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2917 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2918 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2919 NUM_BANKS(ADDR_SURF_16_BANK)); 2920 2921 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2922 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2923 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2924 NUM_BANKS(ADDR_SURF_16_BANK)); 2925 2926 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2927 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2928 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2929 NUM_BANKS(ADDR_SURF_16_BANK)); 2930 2931 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2934 NUM_BANKS(ADDR_SURF_16_BANK)); 2935 2936 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2937 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2938 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2939 NUM_BANKS(ADDR_SURF_16_BANK)); 2940 2941 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2942 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2943 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2944 NUM_BANKS(ADDR_SURF_16_BANK)); 2945 2946 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2947 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2948 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2949 NUM_BANKS(ADDR_SURF_16_BANK)); 2950 2951 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2952 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2953 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2954 NUM_BANKS(ADDR_SURF_8_BANK)); 2955 2956 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2957 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2958 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2959 NUM_BANKS(ADDR_SURF_4_BANK)); 2960 2961 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2962 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2963 2964 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2965 if (reg_offset != 7) 2966 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2967 2968 break; 2969 case CHIP_POLARIS10: 2970 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2971 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2972 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2974 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2978 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2982 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2986 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2990 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2991 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2994 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2996 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2998 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2999 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3002 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3004 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3006 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3008 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3010 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3012 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3016 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3017 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3020 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3021 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3024 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3025 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3028 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3029 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3032 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3036 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3040 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3041 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3044 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3045 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3048 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3049 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3052 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3053 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3056 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3057 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3060 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3061 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3064 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3065 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3068 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3069 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3072 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3073 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3076 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3077 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3080 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3081 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3084 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3085 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3088 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3089 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3090 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3091 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3092 3093 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3096 NUM_BANKS(ADDR_SURF_16_BANK)); 3097 3098 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3099 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3100 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3101 NUM_BANKS(ADDR_SURF_16_BANK)); 3102 3103 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3104 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3105 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3106 NUM_BANKS(ADDR_SURF_16_BANK)); 3107 3108 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3109 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3110 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3111 NUM_BANKS(ADDR_SURF_16_BANK)); 3112 3113 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3116 NUM_BANKS(ADDR_SURF_16_BANK)); 3117 3118 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3119 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3120 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3121 NUM_BANKS(ADDR_SURF_16_BANK)); 3122 3123 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3126 NUM_BANKS(ADDR_SURF_16_BANK)); 3127 3128 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3129 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3130 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3131 NUM_BANKS(ADDR_SURF_16_BANK)); 3132 3133 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3136 NUM_BANKS(ADDR_SURF_16_BANK)); 3137 3138 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3139 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3140 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3141 NUM_BANKS(ADDR_SURF_16_BANK)); 3142 3143 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3146 NUM_BANKS(ADDR_SURF_16_BANK)); 3147 3148 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3151 NUM_BANKS(ADDR_SURF_8_BANK)); 3152 3153 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3154 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3155 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3156 NUM_BANKS(ADDR_SURF_4_BANK)); 3157 3158 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3161 NUM_BANKS(ADDR_SURF_4_BANK)); 3162 3163 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3164 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3165 3166 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3167 if (reg_offset != 7) 3168 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3169 3170 break; 3171 case CHIP_STONEY: 3172 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3176 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3180 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3184 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3188 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3192 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3196 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3200 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3201 PIPE_CONFIG(ADDR_SURF_P2)); 3202 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3203 PIPE_CONFIG(ADDR_SURF_P2) | 3204 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3206 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3207 PIPE_CONFIG(ADDR_SURF_P2) | 3208 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3210 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3211 PIPE_CONFIG(ADDR_SURF_P2) | 3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3214 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3215 PIPE_CONFIG(ADDR_SURF_P2) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3218 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3219 PIPE_CONFIG(ADDR_SURF_P2) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3222 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3223 PIPE_CONFIG(ADDR_SURF_P2) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3226 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3227 PIPE_CONFIG(ADDR_SURF_P2) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3230 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3231 PIPE_CONFIG(ADDR_SURF_P2) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3234 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3235 PIPE_CONFIG(ADDR_SURF_P2) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3238 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3239 PIPE_CONFIG(ADDR_SURF_P2) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3242 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3243 PIPE_CONFIG(ADDR_SURF_P2) | 3244 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3246 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3247 PIPE_CONFIG(ADDR_SURF_P2) | 3248 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3250 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3251 PIPE_CONFIG(ADDR_SURF_P2) | 3252 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3254 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3255 PIPE_CONFIG(ADDR_SURF_P2) | 3256 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3258 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3259 PIPE_CONFIG(ADDR_SURF_P2) | 3260 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3262 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3263 PIPE_CONFIG(ADDR_SURF_P2) | 3264 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3266 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3267 PIPE_CONFIG(ADDR_SURF_P2) | 3268 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3270 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3271 PIPE_CONFIG(ADDR_SURF_P2) | 3272 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3274 3275 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3278 NUM_BANKS(ADDR_SURF_8_BANK)); 3279 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3282 NUM_BANKS(ADDR_SURF_8_BANK)); 3283 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3286 NUM_BANKS(ADDR_SURF_8_BANK)); 3287 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3290 NUM_BANKS(ADDR_SURF_8_BANK)); 3291 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3294 NUM_BANKS(ADDR_SURF_8_BANK)); 3295 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3298 NUM_BANKS(ADDR_SURF_8_BANK)); 3299 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3302 NUM_BANKS(ADDR_SURF_8_BANK)); 3303 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3306 NUM_BANKS(ADDR_SURF_16_BANK)); 3307 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3310 NUM_BANKS(ADDR_SURF_16_BANK)); 3311 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3312 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3313 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3314 NUM_BANKS(ADDR_SURF_16_BANK)); 3315 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3316 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3317 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3318 NUM_BANKS(ADDR_SURF_16_BANK)); 3319 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3320 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3321 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3322 NUM_BANKS(ADDR_SURF_16_BANK)); 3323 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3324 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3325 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3326 NUM_BANKS(ADDR_SURF_16_BANK)); 3327 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3330 NUM_BANKS(ADDR_SURF_8_BANK)); 3331 3332 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3333 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3334 reg_offset != 23) 3335 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3336 3337 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3338 if (reg_offset != 7) 3339 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3340 3341 break; 3342 default: 3343 dev_warn(adev->dev, 3344 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3345 adev->asic_type); 3346 3347 case CHIP_CARRIZO: 3348 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3349 PIPE_CONFIG(ADDR_SURF_P2) | 3350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3352 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3353 PIPE_CONFIG(ADDR_SURF_P2) | 3354 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3356 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3360 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3364 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3368 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3372 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3376 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3377 PIPE_CONFIG(ADDR_SURF_P2)); 3378 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3379 PIPE_CONFIG(ADDR_SURF_P2) | 3380 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3382 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3383 PIPE_CONFIG(ADDR_SURF_P2) | 3384 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3386 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3387 PIPE_CONFIG(ADDR_SURF_P2) | 3388 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3390 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3391 PIPE_CONFIG(ADDR_SURF_P2) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3394 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3395 PIPE_CONFIG(ADDR_SURF_P2) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3398 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3399 PIPE_CONFIG(ADDR_SURF_P2) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3402 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3403 PIPE_CONFIG(ADDR_SURF_P2) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3406 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3407 PIPE_CONFIG(ADDR_SURF_P2) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3410 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3411 PIPE_CONFIG(ADDR_SURF_P2) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3414 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3415 PIPE_CONFIG(ADDR_SURF_P2) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3418 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3419 PIPE_CONFIG(ADDR_SURF_P2) | 3420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3422 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3423 PIPE_CONFIG(ADDR_SURF_P2) | 3424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3426 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3427 PIPE_CONFIG(ADDR_SURF_P2) | 3428 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3430 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3431 PIPE_CONFIG(ADDR_SURF_P2) | 3432 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3434 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3435 PIPE_CONFIG(ADDR_SURF_P2) | 3436 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3438 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3439 PIPE_CONFIG(ADDR_SURF_P2) | 3440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3442 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3443 PIPE_CONFIG(ADDR_SURF_P2) | 3444 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3446 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3447 PIPE_CONFIG(ADDR_SURF_P2) | 3448 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3450 3451 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3454 NUM_BANKS(ADDR_SURF_8_BANK)); 3455 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3458 NUM_BANKS(ADDR_SURF_8_BANK)); 3459 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3462 NUM_BANKS(ADDR_SURF_8_BANK)); 3463 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3466 NUM_BANKS(ADDR_SURF_8_BANK)); 3467 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3470 NUM_BANKS(ADDR_SURF_8_BANK)); 3471 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3474 NUM_BANKS(ADDR_SURF_8_BANK)); 3475 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3478 NUM_BANKS(ADDR_SURF_8_BANK)); 3479 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3482 NUM_BANKS(ADDR_SURF_16_BANK)); 3483 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3486 NUM_BANKS(ADDR_SURF_16_BANK)); 3487 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3490 NUM_BANKS(ADDR_SURF_16_BANK)); 3491 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3494 NUM_BANKS(ADDR_SURF_16_BANK)); 3495 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3498 NUM_BANKS(ADDR_SURF_16_BANK)); 3499 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3500 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3501 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3502 NUM_BANKS(ADDR_SURF_16_BANK)); 3503 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3506 NUM_BANKS(ADDR_SURF_8_BANK)); 3507 3508 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3509 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3510 reg_offset != 23) 3511 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3512 3513 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3514 if (reg_offset != 7) 3515 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3516 3517 break; 3518 } 3519 } 3520 3521 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3522 u32 se_num, u32 sh_num, u32 instance) 3523 { 3524 u32 data; 3525 3526 if (instance == 0xffffffff) 3527 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3528 else 3529 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3530 3531 if (se_num == 0xffffffff) 3532 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3533 else 3534 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3535 3536 if (sh_num == 0xffffffff) 3537 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3538 else 3539 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3540 3541 WREG32(mmGRBM_GFX_INDEX, data); 3542 } 3543 3544 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3545 u32 me, u32 pipe, u32 q) 3546 { 3547 vi_srbm_select(adev, me, pipe, q, 0); 3548 } 3549 3550 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3551 { 3552 u32 data, mask; 3553 3554 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3555 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3556 3557 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3558 3559 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3560 adev->gfx.config.max_sh_per_se); 3561 3562 return (~data) & mask; 3563 } 3564 3565 static void 3566 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3567 { 3568 switch (adev->asic_type) { 3569 case CHIP_FIJI: 3570 case CHIP_VEGAM: 3571 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3572 RB_XSEL2(1) | PKR_MAP(2) | 3573 PKR_XSEL(1) | PKR_YSEL(1) | 3574 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3575 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3576 SE_PAIR_YSEL(2); 3577 break; 3578 case CHIP_TONGA: 3579 case CHIP_POLARIS10: 3580 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3581 SE_XSEL(1) | SE_YSEL(1); 3582 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3583 SE_PAIR_YSEL(2); 3584 break; 3585 case CHIP_TOPAZ: 3586 case CHIP_CARRIZO: 3587 *rconf |= RB_MAP_PKR0(2); 3588 *rconf1 |= 0x0; 3589 break; 3590 case CHIP_POLARIS11: 3591 case CHIP_POLARIS12: 3592 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3593 SE_XSEL(1) | SE_YSEL(1); 3594 *rconf1 |= 0x0; 3595 break; 3596 case CHIP_STONEY: 3597 *rconf |= 0x0; 3598 *rconf1 |= 0x0; 3599 break; 3600 default: 3601 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3602 break; 3603 } 3604 } 3605 3606 static void 3607 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3608 u32 raster_config, u32 raster_config_1, 3609 unsigned rb_mask, unsigned num_rb) 3610 { 3611 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3612 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3613 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3614 unsigned rb_per_se = num_rb / num_se; 3615 unsigned se_mask[4]; 3616 unsigned se; 3617 3618 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3619 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3620 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3621 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3622 3623 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3624 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3625 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3626 3627 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3628 (!se_mask[2] && !se_mask[3]))) { 3629 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3630 3631 if (!se_mask[0] && !se_mask[1]) { 3632 raster_config_1 |= 3633 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3634 } else { 3635 raster_config_1 |= 3636 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3637 } 3638 } 3639 3640 for (se = 0; se < num_se; se++) { 3641 unsigned raster_config_se = raster_config; 3642 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3643 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3644 int idx = (se / 2) * 2; 3645 3646 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3647 raster_config_se &= ~SE_MAP_MASK; 3648 3649 if (!se_mask[idx]) { 3650 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3651 } else { 3652 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3653 } 3654 } 3655 3656 pkr0_mask &= rb_mask; 3657 pkr1_mask &= rb_mask; 3658 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3659 raster_config_se &= ~PKR_MAP_MASK; 3660 3661 if (!pkr0_mask) { 3662 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3663 } else { 3664 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3665 } 3666 } 3667 3668 if (rb_per_se >= 2) { 3669 unsigned rb0_mask = 1 << (se * rb_per_se); 3670 unsigned rb1_mask = rb0_mask << 1; 3671 3672 rb0_mask &= rb_mask; 3673 rb1_mask &= rb_mask; 3674 if (!rb0_mask || !rb1_mask) { 3675 raster_config_se &= ~RB_MAP_PKR0_MASK; 3676 3677 if (!rb0_mask) { 3678 raster_config_se |= 3679 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3680 } else { 3681 raster_config_se |= 3682 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3683 } 3684 } 3685 3686 if (rb_per_se > 2) { 3687 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3688 rb1_mask = rb0_mask << 1; 3689 rb0_mask &= rb_mask; 3690 rb1_mask &= rb_mask; 3691 if (!rb0_mask || !rb1_mask) { 3692 raster_config_se &= ~RB_MAP_PKR1_MASK; 3693 3694 if (!rb0_mask) { 3695 raster_config_se |= 3696 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3697 } else { 3698 raster_config_se |= 3699 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3700 } 3701 } 3702 } 3703 } 3704 3705 /* GRBM_GFX_INDEX has a different offset on VI */ 3706 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3707 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3708 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3709 } 3710 3711 /* GRBM_GFX_INDEX has a different offset on VI */ 3712 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3713 } 3714 3715 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3716 { 3717 int i, j; 3718 u32 data; 3719 u32 raster_config = 0, raster_config_1 = 0; 3720 u32 active_rbs = 0; 3721 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3722 adev->gfx.config.max_sh_per_se; 3723 unsigned num_rb_pipes; 3724 3725 mutex_lock(&adev->grbm_idx_mutex); 3726 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3727 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3728 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3729 data = gfx_v8_0_get_rb_active_bitmap(adev); 3730 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3731 rb_bitmap_width_per_sh); 3732 } 3733 } 3734 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3735 3736 adev->gfx.config.backend_enable_mask = active_rbs; 3737 adev->gfx.config.num_rbs = hweight32(active_rbs); 3738 3739 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3740 adev->gfx.config.max_shader_engines, 16); 3741 3742 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3743 3744 if (!adev->gfx.config.backend_enable_mask || 3745 adev->gfx.config.num_rbs >= num_rb_pipes) { 3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3748 } else { 3749 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3750 adev->gfx.config.backend_enable_mask, 3751 num_rb_pipes); 3752 } 3753 3754 /* cache the values for userspace */ 3755 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3756 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3757 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3758 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3759 RREG32(mmCC_RB_BACKEND_DISABLE); 3760 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3761 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3762 adev->gfx.config.rb_config[i][j].raster_config = 3763 RREG32(mmPA_SC_RASTER_CONFIG); 3764 adev->gfx.config.rb_config[i][j].raster_config_1 = 3765 RREG32(mmPA_SC_RASTER_CONFIG_1); 3766 } 3767 } 3768 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3769 mutex_unlock(&adev->grbm_idx_mutex); 3770 } 3771 3772 /** 3773 * gfx_v8_0_init_compute_vmid - gart enable 3774 * 3775 * @adev: amdgpu_device pointer 3776 * 3777 * Initialize compute vmid sh_mem registers 3778 * 3779 */ 3780 #define DEFAULT_SH_MEM_BASES (0x6000) 3781 #define FIRST_COMPUTE_VMID (8) 3782 #define LAST_COMPUTE_VMID (16) 3783 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3784 { 3785 int i; 3786 uint32_t sh_mem_config; 3787 uint32_t sh_mem_bases; 3788 3789 /* 3790 * Configure apertures: 3791 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3792 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3793 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3794 */ 3795 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3796 3797 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3798 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3799 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3800 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3801 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3802 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3803 3804 mutex_lock(&adev->srbm_mutex); 3805 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3806 vi_srbm_select(adev, 0, 0, 0, i); 3807 /* CP and shaders */ 3808 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3809 WREG32(mmSH_MEM_APE1_BASE, 1); 3810 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3811 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3812 } 3813 vi_srbm_select(adev, 0, 0, 0, 0); 3814 mutex_unlock(&adev->srbm_mutex); 3815 } 3816 3817 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3818 { 3819 switch (adev->asic_type) { 3820 default: 3821 adev->gfx.config.double_offchip_lds_buf = 1; 3822 break; 3823 case CHIP_CARRIZO: 3824 case CHIP_STONEY: 3825 adev->gfx.config.double_offchip_lds_buf = 0; 3826 break; 3827 } 3828 } 3829 3830 static void gfx_v8_0_constants_init(struct amdgpu_device *adev) 3831 { 3832 u32 tmp, sh_static_mem_cfg; 3833 int i; 3834 3835 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3836 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3837 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3838 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3839 3840 gfx_v8_0_tiling_mode_table_init(adev); 3841 gfx_v8_0_setup_rb(adev); 3842 gfx_v8_0_get_cu_info(adev); 3843 gfx_v8_0_config_init(adev); 3844 3845 /* XXX SH_MEM regs */ 3846 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3847 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3848 SWIZZLE_ENABLE, 1); 3849 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3850 ELEMENT_SIZE, 1); 3851 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3852 INDEX_STRIDE, 3); 3853 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3854 3855 mutex_lock(&adev->srbm_mutex); 3856 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3857 vi_srbm_select(adev, 0, 0, 0, i); 3858 /* CP and shaders */ 3859 if (i == 0) { 3860 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3861 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3862 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3863 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3864 WREG32(mmSH_MEM_CONFIG, tmp); 3865 WREG32(mmSH_MEM_BASES, 0); 3866 } else { 3867 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3868 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3869 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3870 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3871 WREG32(mmSH_MEM_CONFIG, tmp); 3872 tmp = adev->gmc.shared_aperture_start >> 48; 3873 WREG32(mmSH_MEM_BASES, tmp); 3874 } 3875 3876 WREG32(mmSH_MEM_APE1_BASE, 1); 3877 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3878 } 3879 vi_srbm_select(adev, 0, 0, 0, 0); 3880 mutex_unlock(&adev->srbm_mutex); 3881 3882 gfx_v8_0_init_compute_vmid(adev); 3883 3884 mutex_lock(&adev->grbm_idx_mutex); 3885 /* 3886 * making sure that the following register writes will be broadcasted 3887 * to all the shaders 3888 */ 3889 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3890 3891 WREG32(mmPA_SC_FIFO_SIZE, 3892 (adev->gfx.config.sc_prim_fifo_size_frontend << 3893 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3894 (adev->gfx.config.sc_prim_fifo_size_backend << 3895 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3896 (adev->gfx.config.sc_hiz_tile_fifo_size << 3897 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3898 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3899 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3900 3901 tmp = RREG32(mmSPI_ARB_PRIORITY); 3902 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3903 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3904 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3905 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3906 WREG32(mmSPI_ARB_PRIORITY, tmp); 3907 3908 mutex_unlock(&adev->grbm_idx_mutex); 3909 3910 } 3911 3912 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3913 { 3914 u32 i, j, k; 3915 u32 mask; 3916 3917 mutex_lock(&adev->grbm_idx_mutex); 3918 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3919 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3920 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3921 for (k = 0; k < adev->usec_timeout; k++) { 3922 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3923 break; 3924 udelay(1); 3925 } 3926 if (k == adev->usec_timeout) { 3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3928 0xffffffff, 0xffffffff); 3929 mutex_unlock(&adev->grbm_idx_mutex); 3930 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3931 i, j); 3932 return; 3933 } 3934 } 3935 } 3936 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3937 mutex_unlock(&adev->grbm_idx_mutex); 3938 3939 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3940 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3941 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3942 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3943 for (k = 0; k < adev->usec_timeout; k++) { 3944 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3945 break; 3946 udelay(1); 3947 } 3948 } 3949 3950 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3951 bool enable) 3952 { 3953 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3954 3955 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3956 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3957 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3958 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3959 3960 WREG32(mmCP_INT_CNTL_RING0, tmp); 3961 } 3962 3963 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3964 { 3965 /* csib */ 3966 WREG32(mmRLC_CSIB_ADDR_HI, 3967 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3968 WREG32(mmRLC_CSIB_ADDR_LO, 3969 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3970 WREG32(mmRLC_CSIB_LENGTH, 3971 adev->gfx.rlc.clear_state_size); 3972 } 3973 3974 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3975 int ind_offset, 3976 int list_size, 3977 int *unique_indices, 3978 int *indices_count, 3979 int max_indices, 3980 int *ind_start_offsets, 3981 int *offset_count, 3982 int max_offset) 3983 { 3984 int indices; 3985 bool new_entry = true; 3986 3987 for (; ind_offset < list_size; ind_offset++) { 3988 3989 if (new_entry) { 3990 new_entry = false; 3991 ind_start_offsets[*offset_count] = ind_offset; 3992 *offset_count = *offset_count + 1; 3993 BUG_ON(*offset_count >= max_offset); 3994 } 3995 3996 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3997 new_entry = true; 3998 continue; 3999 } 4000 4001 ind_offset += 2; 4002 4003 /* look for the matching indice */ 4004 for (indices = 0; 4005 indices < *indices_count; 4006 indices++) { 4007 if (unique_indices[indices] == 4008 register_list_format[ind_offset]) 4009 break; 4010 } 4011 4012 if (indices >= *indices_count) { 4013 unique_indices[*indices_count] = 4014 register_list_format[ind_offset]; 4015 indices = *indices_count; 4016 *indices_count = *indices_count + 1; 4017 BUG_ON(*indices_count >= max_indices); 4018 } 4019 4020 register_list_format[ind_offset] = indices; 4021 } 4022 } 4023 4024 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4025 { 4026 int i, temp, data; 4027 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4028 int indices_count = 0; 4029 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4030 int offset_count = 0; 4031 4032 int list_size; 4033 unsigned int *register_list_format = 4034 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4035 if (!register_list_format) 4036 return -ENOMEM; 4037 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4038 adev->gfx.rlc.reg_list_format_size_bytes); 4039 4040 gfx_v8_0_parse_ind_reg_list(register_list_format, 4041 RLC_FormatDirectRegListLength, 4042 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4043 unique_indices, 4044 &indices_count, 4045 ARRAY_SIZE(unique_indices), 4046 indirect_start_offsets, 4047 &offset_count, 4048 ARRAY_SIZE(indirect_start_offsets)); 4049 4050 /* save and restore list */ 4051 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4052 4053 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4054 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4055 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4056 4057 /* indirect list */ 4058 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4059 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4060 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4061 4062 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4063 list_size = list_size >> 1; 4064 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4065 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4066 4067 /* starting offsets starts */ 4068 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4069 adev->gfx.rlc.starting_offsets_start); 4070 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4071 WREG32(mmRLC_GPM_SCRATCH_DATA, 4072 indirect_start_offsets[i]); 4073 4074 /* unique indices */ 4075 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4076 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4077 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4078 if (unique_indices[i] != 0) { 4079 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4080 WREG32(data + i, unique_indices[i] >> 20); 4081 } 4082 } 4083 kfree(register_list_format); 4084 4085 return 0; 4086 } 4087 4088 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4089 { 4090 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4091 } 4092 4093 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4094 { 4095 uint32_t data; 4096 4097 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4098 4099 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4100 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4101 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4102 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4103 WREG32(mmRLC_PG_DELAY, data); 4104 4105 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4106 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4107 4108 } 4109 4110 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4111 bool enable) 4112 { 4113 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4114 } 4115 4116 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4117 bool enable) 4118 { 4119 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4120 } 4121 4122 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4123 { 4124 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4125 } 4126 4127 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4128 { 4129 if ((adev->asic_type == CHIP_CARRIZO) || 4130 (adev->asic_type == CHIP_STONEY)) { 4131 gfx_v8_0_init_csb(adev); 4132 gfx_v8_0_init_save_restore_list(adev); 4133 gfx_v8_0_enable_save_restore_machine(adev); 4134 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4135 gfx_v8_0_init_power_gating(adev); 4136 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4137 } else if ((adev->asic_type == CHIP_POLARIS11) || 4138 (adev->asic_type == CHIP_POLARIS12) || 4139 (adev->asic_type == CHIP_VEGAM)) { 4140 gfx_v8_0_init_csb(adev); 4141 gfx_v8_0_init_save_restore_list(adev); 4142 gfx_v8_0_enable_save_restore_machine(adev); 4143 gfx_v8_0_init_power_gating(adev); 4144 } 4145 4146 } 4147 4148 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4149 { 4150 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4151 4152 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4153 gfx_v8_0_wait_for_rlc_serdes(adev); 4154 } 4155 4156 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4157 { 4158 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4159 udelay(50); 4160 4161 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4162 udelay(50); 4163 } 4164 4165 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4166 { 4167 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4168 4169 /* carrizo do enable cp interrupt after cp inited */ 4170 if (!(adev->flags & AMD_IS_APU)) 4171 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4172 4173 udelay(50); 4174 } 4175 4176 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4177 { 4178 gfx_v8_0_rlc_stop(adev); 4179 gfx_v8_0_rlc_reset(adev); 4180 gfx_v8_0_init_pg(adev); 4181 gfx_v8_0_rlc_start(adev); 4182 4183 return 0; 4184 } 4185 4186 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4187 { 4188 int i; 4189 u32 tmp = RREG32(mmCP_ME_CNTL); 4190 4191 if (enable) { 4192 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4193 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4194 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4195 } else { 4196 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4197 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4198 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4199 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4200 adev->gfx.gfx_ring[i].ready = false; 4201 } 4202 WREG32(mmCP_ME_CNTL, tmp); 4203 udelay(50); 4204 } 4205 4206 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4207 { 4208 u32 count = 0; 4209 const struct cs_section_def *sect = NULL; 4210 const struct cs_extent_def *ext = NULL; 4211 4212 /* begin clear state */ 4213 count += 2; 4214 /* context control state */ 4215 count += 3; 4216 4217 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4218 for (ext = sect->section; ext->extent != NULL; ++ext) { 4219 if (sect->id == SECT_CONTEXT) 4220 count += 2 + ext->reg_count; 4221 else 4222 return 0; 4223 } 4224 } 4225 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4226 count += 4; 4227 /* end clear state */ 4228 count += 2; 4229 /* clear state */ 4230 count += 2; 4231 4232 return count; 4233 } 4234 4235 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4236 { 4237 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4238 const struct cs_section_def *sect = NULL; 4239 const struct cs_extent_def *ext = NULL; 4240 int r, i; 4241 4242 /* init the CP */ 4243 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4244 WREG32(mmCP_ENDIAN_SWAP, 0); 4245 WREG32(mmCP_DEVICE_ID, 1); 4246 4247 gfx_v8_0_cp_gfx_enable(adev, true); 4248 4249 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4250 if (r) { 4251 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4252 return r; 4253 } 4254 4255 /* clear state buffer */ 4256 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4257 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4258 4259 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4260 amdgpu_ring_write(ring, 0x80000000); 4261 amdgpu_ring_write(ring, 0x80000000); 4262 4263 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4264 for (ext = sect->section; ext->extent != NULL; ++ext) { 4265 if (sect->id == SECT_CONTEXT) { 4266 amdgpu_ring_write(ring, 4267 PACKET3(PACKET3_SET_CONTEXT_REG, 4268 ext->reg_count)); 4269 amdgpu_ring_write(ring, 4270 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4271 for (i = 0; i < ext->reg_count; i++) 4272 amdgpu_ring_write(ring, ext->extent[i]); 4273 } 4274 } 4275 } 4276 4277 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4278 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4279 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4280 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4281 4282 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4283 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4284 4285 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4286 amdgpu_ring_write(ring, 0); 4287 4288 /* init the CE partitions */ 4289 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4290 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4291 amdgpu_ring_write(ring, 0x8000); 4292 amdgpu_ring_write(ring, 0x8000); 4293 4294 amdgpu_ring_commit(ring); 4295 4296 return 0; 4297 } 4298 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4299 { 4300 u32 tmp; 4301 /* no gfx doorbells on iceland */ 4302 if (adev->asic_type == CHIP_TOPAZ) 4303 return; 4304 4305 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4306 4307 if (ring->use_doorbell) { 4308 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4309 DOORBELL_OFFSET, ring->doorbell_index); 4310 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4311 DOORBELL_HIT, 0); 4312 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4313 DOORBELL_EN, 1); 4314 } else { 4315 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4316 } 4317 4318 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4319 4320 if (adev->flags & AMD_IS_APU) 4321 return; 4322 4323 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4324 DOORBELL_RANGE_LOWER, 4325 AMDGPU_DOORBELL_GFX_RING0); 4326 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4327 4328 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4329 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4330 } 4331 4332 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4333 { 4334 struct amdgpu_ring *ring; 4335 u32 tmp; 4336 u32 rb_bufsz; 4337 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4338 int r; 4339 4340 /* Set the write pointer delay */ 4341 WREG32(mmCP_RB_WPTR_DELAY, 0); 4342 4343 /* set the RB to use vmid 0 */ 4344 WREG32(mmCP_RB_VMID, 0); 4345 4346 /* Set ring buffer size */ 4347 ring = &adev->gfx.gfx_ring[0]; 4348 rb_bufsz = order_base_2(ring->ring_size / 8); 4349 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4350 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4351 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4352 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4353 #ifdef __BIG_ENDIAN 4354 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4355 #endif 4356 WREG32(mmCP_RB0_CNTL, tmp); 4357 4358 /* Initialize the ring buffer's read and write pointers */ 4359 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4360 ring->wptr = 0; 4361 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4362 4363 /* set the wb address wether it's enabled or not */ 4364 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4365 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4366 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4367 4368 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4369 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4370 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4371 mdelay(1); 4372 WREG32(mmCP_RB0_CNTL, tmp); 4373 4374 rb_addr = ring->gpu_addr >> 8; 4375 WREG32(mmCP_RB0_BASE, rb_addr); 4376 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4377 4378 gfx_v8_0_set_cpg_door_bell(adev, ring); 4379 /* start the ring */ 4380 amdgpu_ring_clear_ring(ring); 4381 gfx_v8_0_cp_gfx_start(adev); 4382 ring->ready = true; 4383 r = amdgpu_ring_test_ring(ring); 4384 if (r) 4385 ring->ready = false; 4386 4387 return r; 4388 } 4389 4390 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4391 { 4392 int i; 4393 4394 if (enable) { 4395 WREG32(mmCP_MEC_CNTL, 0); 4396 } else { 4397 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4398 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4399 adev->gfx.compute_ring[i].ready = false; 4400 adev->gfx.kiq.ring.ready = false; 4401 } 4402 udelay(50); 4403 } 4404 4405 /* KIQ functions */ 4406 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4407 { 4408 uint32_t tmp; 4409 struct amdgpu_device *adev = ring->adev; 4410 4411 /* tell RLC which is KIQ queue */ 4412 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4413 tmp &= 0xffffff00; 4414 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4415 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4416 tmp |= 0x80; 4417 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4418 } 4419 4420 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4421 { 4422 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4423 uint64_t queue_mask = 0; 4424 int r, i; 4425 4426 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4427 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4428 continue; 4429 4430 /* This situation may be hit in the future if a new HW 4431 * generation exposes more than 64 queues. If so, the 4432 * definition of queue_mask needs updating */ 4433 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4434 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4435 break; 4436 } 4437 4438 queue_mask |= (1ull << i); 4439 } 4440 4441 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); 4442 if (r) { 4443 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4444 return r; 4445 } 4446 /* set resources */ 4447 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4448 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4449 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4450 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4451 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4452 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4453 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4454 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4455 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4456 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4457 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4458 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4459 4460 /* map queues */ 4461 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4462 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4463 amdgpu_ring_write(kiq_ring, 4464 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4465 amdgpu_ring_write(kiq_ring, 4466 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4467 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4468 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4469 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4470 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4471 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4472 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4473 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4474 } 4475 4476 r = amdgpu_ring_test_ring(kiq_ring); 4477 if (r) { 4478 DRM_ERROR("KCQ enable failed\n"); 4479 kiq_ring->ready = false; 4480 } 4481 return r; 4482 } 4483 4484 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4485 { 4486 int i, r = 0; 4487 4488 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4489 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4490 for (i = 0; i < adev->usec_timeout; i++) { 4491 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4492 break; 4493 udelay(1); 4494 } 4495 if (i == adev->usec_timeout) 4496 r = -ETIMEDOUT; 4497 } 4498 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4499 WREG32(mmCP_HQD_PQ_RPTR, 0); 4500 WREG32(mmCP_HQD_PQ_WPTR, 0); 4501 4502 return r; 4503 } 4504 4505 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4506 { 4507 struct amdgpu_device *adev = ring->adev; 4508 struct vi_mqd *mqd = ring->mqd_ptr; 4509 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4510 uint32_t tmp; 4511 4512 mqd->header = 0xC0310800; 4513 mqd->compute_pipelinestat_enable = 0x00000001; 4514 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4515 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4516 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4517 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4518 mqd->compute_misc_reserved = 0x00000003; 4519 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4520 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4521 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4522 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4523 eop_base_addr = ring->eop_gpu_addr >> 8; 4524 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4525 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4526 4527 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4528 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4529 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4530 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4531 4532 mqd->cp_hqd_eop_control = tmp; 4533 4534 /* enable doorbell? */ 4535 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4536 CP_HQD_PQ_DOORBELL_CONTROL, 4537 DOORBELL_EN, 4538 ring->use_doorbell ? 1 : 0); 4539 4540 mqd->cp_hqd_pq_doorbell_control = tmp; 4541 4542 /* set the pointer to the MQD */ 4543 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4544 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4545 4546 /* set MQD vmid to 0 */ 4547 tmp = RREG32(mmCP_MQD_CONTROL); 4548 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4549 mqd->cp_mqd_control = tmp; 4550 4551 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4552 hqd_gpu_addr = ring->gpu_addr >> 8; 4553 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4554 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4555 4556 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4557 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4558 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4559 (order_base_2(ring->ring_size / 4) - 1)); 4560 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4561 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4562 #ifdef __BIG_ENDIAN 4563 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4564 #endif 4565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4568 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4569 mqd->cp_hqd_pq_control = tmp; 4570 4571 /* set the wb address whether it's enabled or not */ 4572 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4573 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4574 mqd->cp_hqd_pq_rptr_report_addr_hi = 4575 upper_32_bits(wb_gpu_addr) & 0xffff; 4576 4577 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4578 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4579 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4580 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4581 4582 tmp = 0; 4583 /* enable the doorbell if requested */ 4584 if (ring->use_doorbell) { 4585 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4586 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4587 DOORBELL_OFFSET, ring->doorbell_index); 4588 4589 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4590 DOORBELL_EN, 1); 4591 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4592 DOORBELL_SOURCE, 0); 4593 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4594 DOORBELL_HIT, 0); 4595 } 4596 4597 mqd->cp_hqd_pq_doorbell_control = tmp; 4598 4599 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4600 ring->wptr = 0; 4601 mqd->cp_hqd_pq_wptr = ring->wptr; 4602 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4603 4604 /* set the vmid for the queue */ 4605 mqd->cp_hqd_vmid = 0; 4606 4607 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4608 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4609 mqd->cp_hqd_persistent_state = tmp; 4610 4611 /* set MTYPE */ 4612 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4613 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4614 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4615 mqd->cp_hqd_ib_control = tmp; 4616 4617 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4618 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4619 mqd->cp_hqd_iq_timer = tmp; 4620 4621 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4622 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4623 mqd->cp_hqd_ctx_save_control = tmp; 4624 4625 /* defaults */ 4626 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4627 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4628 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4629 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4630 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4631 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4632 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4633 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4634 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4635 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4636 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4637 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4638 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4639 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4640 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4641 4642 /* activate the queue */ 4643 mqd->cp_hqd_active = 1; 4644 4645 return 0; 4646 } 4647 4648 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4649 struct vi_mqd *mqd) 4650 { 4651 uint32_t mqd_reg; 4652 uint32_t *mqd_data; 4653 4654 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4655 mqd_data = &mqd->cp_mqd_base_addr_lo; 4656 4657 /* disable wptr polling */ 4658 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4659 4660 /* program all HQD registers */ 4661 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4662 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4663 4664 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4665 * This is safe since EOP RPTR==WPTR for any inactive HQD 4666 * on ASICs that do not support context-save. 4667 * EOP writes/reads can start anywhere in the ring. 4668 */ 4669 if (adev->asic_type != CHIP_TONGA) { 4670 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4671 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4672 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4673 } 4674 4675 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4676 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4677 4678 /* activate the HQD */ 4679 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4680 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4681 4682 return 0; 4683 } 4684 4685 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4686 { 4687 struct amdgpu_device *adev = ring->adev; 4688 struct vi_mqd *mqd = ring->mqd_ptr; 4689 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4690 4691 gfx_v8_0_kiq_setting(ring); 4692 4693 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4694 /* reset MQD to a clean status */ 4695 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4696 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4697 4698 /* reset ring buffer */ 4699 ring->wptr = 0; 4700 amdgpu_ring_clear_ring(ring); 4701 mutex_lock(&adev->srbm_mutex); 4702 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4703 gfx_v8_0_mqd_commit(adev, mqd); 4704 vi_srbm_select(adev, 0, 0, 0, 0); 4705 mutex_unlock(&adev->srbm_mutex); 4706 } else { 4707 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4708 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4709 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4710 mutex_lock(&adev->srbm_mutex); 4711 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4712 gfx_v8_0_mqd_init(ring); 4713 gfx_v8_0_mqd_commit(adev, mqd); 4714 vi_srbm_select(adev, 0, 0, 0, 0); 4715 mutex_unlock(&adev->srbm_mutex); 4716 4717 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4718 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4719 } 4720 4721 return 0; 4722 } 4723 4724 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4725 { 4726 struct amdgpu_device *adev = ring->adev; 4727 struct vi_mqd *mqd = ring->mqd_ptr; 4728 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4729 4730 if (!adev->in_gpu_reset && !adev->in_suspend) { 4731 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4732 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4733 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4734 mutex_lock(&adev->srbm_mutex); 4735 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4736 gfx_v8_0_mqd_init(ring); 4737 vi_srbm_select(adev, 0, 0, 0, 0); 4738 mutex_unlock(&adev->srbm_mutex); 4739 4740 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4741 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4742 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4743 /* reset MQD to a clean status */ 4744 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4745 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4746 /* reset ring buffer */ 4747 ring->wptr = 0; 4748 amdgpu_ring_clear_ring(ring); 4749 } else { 4750 amdgpu_ring_clear_ring(ring); 4751 } 4752 return 0; 4753 } 4754 4755 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4756 { 4757 if (adev->asic_type > CHIP_TONGA) { 4758 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4759 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4760 } 4761 /* enable doorbells */ 4762 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4763 } 4764 4765 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4766 { 4767 struct amdgpu_ring *ring; 4768 int r; 4769 4770 ring = &adev->gfx.kiq.ring; 4771 4772 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4773 if (unlikely(r != 0)) 4774 return r; 4775 4776 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4777 if (unlikely(r != 0)) 4778 return r; 4779 4780 gfx_v8_0_kiq_init_queue(ring); 4781 amdgpu_bo_kunmap(ring->mqd_obj); 4782 ring->mqd_ptr = NULL; 4783 amdgpu_bo_unreserve(ring->mqd_obj); 4784 ring->ready = true; 4785 return 0; 4786 } 4787 4788 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) 4789 { 4790 struct amdgpu_ring *ring = NULL; 4791 int r = 0, i; 4792 4793 gfx_v8_0_cp_compute_enable(adev, true); 4794 4795 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4796 ring = &adev->gfx.compute_ring[i]; 4797 4798 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4799 if (unlikely(r != 0)) 4800 goto done; 4801 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4802 if (!r) { 4803 r = gfx_v8_0_kcq_init_queue(ring); 4804 amdgpu_bo_kunmap(ring->mqd_obj); 4805 ring->mqd_ptr = NULL; 4806 } 4807 amdgpu_bo_unreserve(ring->mqd_obj); 4808 if (r) 4809 goto done; 4810 } 4811 4812 gfx_v8_0_set_mec_doorbell_range(adev); 4813 4814 r = gfx_v8_0_kiq_kcq_enable(adev); 4815 if (r) 4816 goto done; 4817 4818 /* Test KCQs - reversing the order of rings seems to fix ring test failure 4819 * after GPU reset 4820 */ 4821 for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { 4822 ring = &adev->gfx.compute_ring[i]; 4823 ring->ready = true; 4824 r = amdgpu_ring_test_ring(ring); 4825 if (r) 4826 ring->ready = false; 4827 } 4828 4829 done: 4830 return r; 4831 } 4832 4833 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4834 { 4835 int r; 4836 4837 if (!(adev->flags & AMD_IS_APU)) 4838 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4839 4840 r = gfx_v8_0_kiq_resume(adev); 4841 if (r) 4842 return r; 4843 4844 r = gfx_v8_0_cp_gfx_resume(adev); 4845 if (r) 4846 return r; 4847 4848 r = gfx_v8_0_kcq_resume(adev); 4849 if (r) 4850 return r; 4851 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4852 4853 return 0; 4854 } 4855 4856 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4857 { 4858 gfx_v8_0_cp_gfx_enable(adev, enable); 4859 gfx_v8_0_cp_compute_enable(adev, enable); 4860 } 4861 4862 static int gfx_v8_0_hw_init(void *handle) 4863 { 4864 int r; 4865 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4866 4867 gfx_v8_0_init_golden_registers(adev); 4868 gfx_v8_0_constants_init(adev); 4869 4870 r = gfx_v8_0_rlc_resume(adev); 4871 if (r) 4872 return r; 4873 4874 r = gfx_v8_0_cp_resume(adev); 4875 4876 return r; 4877 } 4878 4879 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) 4880 { 4881 int r, i; 4882 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4883 4884 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 4885 if (r) 4886 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4887 4888 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4889 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4890 4891 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 4892 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 4893 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 4894 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 4895 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 4896 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 4897 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 4898 amdgpu_ring_write(kiq_ring, 0); 4899 amdgpu_ring_write(kiq_ring, 0); 4900 amdgpu_ring_write(kiq_ring, 0); 4901 } 4902 r = amdgpu_ring_test_ring(kiq_ring); 4903 if (r) 4904 DRM_ERROR("KCQ disable failed\n"); 4905 4906 return r; 4907 } 4908 4909 static bool gfx_v8_0_is_idle(void *handle) 4910 { 4911 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4912 4913 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) 4914 || RREG32(mmGRBM_STATUS2) != 0x8) 4915 return false; 4916 else 4917 return true; 4918 } 4919 4920 static bool gfx_v8_0_rlc_is_idle(void *handle) 4921 { 4922 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4923 4924 if (RREG32(mmGRBM_STATUS2) != 0x8) 4925 return false; 4926 else 4927 return true; 4928 } 4929 4930 static int gfx_v8_0_wait_for_rlc_idle(void *handle) 4931 { 4932 unsigned int i; 4933 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4934 4935 for (i = 0; i < adev->usec_timeout; i++) { 4936 if (gfx_v8_0_rlc_is_idle(handle)) 4937 return 0; 4938 4939 udelay(1); 4940 } 4941 return -ETIMEDOUT; 4942 } 4943 4944 static int gfx_v8_0_wait_for_idle(void *handle) 4945 { 4946 unsigned int i; 4947 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4948 4949 for (i = 0; i < adev->usec_timeout; i++) { 4950 if (gfx_v8_0_is_idle(handle)) 4951 return 0; 4952 4953 udelay(1); 4954 } 4955 return -ETIMEDOUT; 4956 } 4957 4958 static int gfx_v8_0_hw_fini(void *handle) 4959 { 4960 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4961 4962 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4963 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4964 4965 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4966 4967 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 4968 4969 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4970 gfx_v8_0_kcq_disable(adev); 4971 4972 if (amdgpu_sriov_vf(adev)) { 4973 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4974 return 0; 4975 } 4976 adev->gfx.rlc.funcs->enter_safe_mode(adev); 4977 if (!gfx_v8_0_wait_for_idle(adev)) 4978 gfx_v8_0_cp_enable(adev, false); 4979 else 4980 pr_err("cp is busy, skip halt cp\n"); 4981 if (!gfx_v8_0_wait_for_rlc_idle(adev)) 4982 gfx_v8_0_rlc_stop(adev); 4983 else 4984 pr_err("rlc is busy, skip halt rlc\n"); 4985 adev->gfx.rlc.funcs->exit_safe_mode(adev); 4986 return 0; 4987 } 4988 4989 static int gfx_v8_0_suspend(void *handle) 4990 { 4991 return gfx_v8_0_hw_fini(handle); 4992 } 4993 4994 static int gfx_v8_0_resume(void *handle) 4995 { 4996 return gfx_v8_0_hw_init(handle); 4997 } 4998 4999 static bool gfx_v8_0_check_soft_reset(void *handle) 5000 { 5001 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5002 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5003 u32 tmp; 5004 5005 /* GRBM_STATUS */ 5006 tmp = RREG32(mmGRBM_STATUS); 5007 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5008 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5009 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5010 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5011 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5012 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5013 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5014 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5015 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5016 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5017 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5018 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5019 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5020 } 5021 5022 /* GRBM_STATUS2 */ 5023 tmp = RREG32(mmGRBM_STATUS2); 5024 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5025 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5026 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5027 5028 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5029 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5030 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5031 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5032 SOFT_RESET_CPF, 1); 5033 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5034 SOFT_RESET_CPC, 1); 5035 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5036 SOFT_RESET_CPG, 1); 5037 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5038 SOFT_RESET_GRBM, 1); 5039 } 5040 5041 /* SRBM_STATUS */ 5042 tmp = RREG32(mmSRBM_STATUS); 5043 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5044 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5045 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5046 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5047 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5048 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5049 5050 if (grbm_soft_reset || srbm_soft_reset) { 5051 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5052 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5053 return true; 5054 } else { 5055 adev->gfx.grbm_soft_reset = 0; 5056 adev->gfx.srbm_soft_reset = 0; 5057 return false; 5058 } 5059 } 5060 5061 static int gfx_v8_0_pre_soft_reset(void *handle) 5062 { 5063 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5064 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5065 5066 if ((!adev->gfx.grbm_soft_reset) && 5067 (!adev->gfx.srbm_soft_reset)) 5068 return 0; 5069 5070 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5071 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5072 5073 /* stop the rlc */ 5074 gfx_v8_0_rlc_stop(adev); 5075 5076 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5077 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5078 /* Disable GFX parsing/prefetching */ 5079 gfx_v8_0_cp_gfx_enable(adev, false); 5080 5081 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5082 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5083 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5084 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5085 int i; 5086 5087 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5088 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5089 5090 mutex_lock(&adev->srbm_mutex); 5091 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5092 gfx_v8_0_deactivate_hqd(adev, 2); 5093 vi_srbm_select(adev, 0, 0, 0, 0); 5094 mutex_unlock(&adev->srbm_mutex); 5095 } 5096 /* Disable MEC parsing/prefetching */ 5097 gfx_v8_0_cp_compute_enable(adev, false); 5098 } 5099 5100 return 0; 5101 } 5102 5103 static int gfx_v8_0_soft_reset(void *handle) 5104 { 5105 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5106 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5107 u32 tmp; 5108 5109 if ((!adev->gfx.grbm_soft_reset) && 5110 (!adev->gfx.srbm_soft_reset)) 5111 return 0; 5112 5113 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5114 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5115 5116 if (grbm_soft_reset || srbm_soft_reset) { 5117 tmp = RREG32(mmGMCON_DEBUG); 5118 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5119 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5120 WREG32(mmGMCON_DEBUG, tmp); 5121 udelay(50); 5122 } 5123 5124 if (grbm_soft_reset) { 5125 tmp = RREG32(mmGRBM_SOFT_RESET); 5126 tmp |= grbm_soft_reset; 5127 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5128 WREG32(mmGRBM_SOFT_RESET, tmp); 5129 tmp = RREG32(mmGRBM_SOFT_RESET); 5130 5131 udelay(50); 5132 5133 tmp &= ~grbm_soft_reset; 5134 WREG32(mmGRBM_SOFT_RESET, tmp); 5135 tmp = RREG32(mmGRBM_SOFT_RESET); 5136 } 5137 5138 if (srbm_soft_reset) { 5139 tmp = RREG32(mmSRBM_SOFT_RESET); 5140 tmp |= srbm_soft_reset; 5141 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5142 WREG32(mmSRBM_SOFT_RESET, tmp); 5143 tmp = RREG32(mmSRBM_SOFT_RESET); 5144 5145 udelay(50); 5146 5147 tmp &= ~srbm_soft_reset; 5148 WREG32(mmSRBM_SOFT_RESET, tmp); 5149 tmp = RREG32(mmSRBM_SOFT_RESET); 5150 } 5151 5152 if (grbm_soft_reset || srbm_soft_reset) { 5153 tmp = RREG32(mmGMCON_DEBUG); 5154 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5155 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5156 WREG32(mmGMCON_DEBUG, tmp); 5157 } 5158 5159 /* Wait a little for things to settle down */ 5160 udelay(50); 5161 5162 return 0; 5163 } 5164 5165 static int gfx_v8_0_post_soft_reset(void *handle) 5166 { 5167 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5168 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5169 5170 if ((!adev->gfx.grbm_soft_reset) && 5171 (!adev->gfx.srbm_soft_reset)) 5172 return 0; 5173 5174 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5175 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5176 5177 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5178 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5179 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5180 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5181 int i; 5182 5183 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5184 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5185 5186 mutex_lock(&adev->srbm_mutex); 5187 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5188 gfx_v8_0_deactivate_hqd(adev, 2); 5189 vi_srbm_select(adev, 0, 0, 0, 0); 5190 mutex_unlock(&adev->srbm_mutex); 5191 } 5192 gfx_v8_0_kiq_resume(adev); 5193 gfx_v8_0_kcq_resume(adev); 5194 } 5195 5196 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5197 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5198 gfx_v8_0_cp_gfx_resume(adev); 5199 5200 gfx_v8_0_rlc_start(adev); 5201 5202 return 0; 5203 } 5204 5205 /** 5206 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5207 * 5208 * @adev: amdgpu_device pointer 5209 * 5210 * Fetches a GPU clock counter snapshot. 5211 * Returns the 64 bit clock counter snapshot. 5212 */ 5213 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5214 { 5215 uint64_t clock; 5216 5217 mutex_lock(&adev->gfx.gpu_clock_mutex); 5218 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5219 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5220 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5221 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5222 return clock; 5223 } 5224 5225 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5226 uint32_t vmid, 5227 uint32_t gds_base, uint32_t gds_size, 5228 uint32_t gws_base, uint32_t gws_size, 5229 uint32_t oa_base, uint32_t oa_size) 5230 { 5231 /* GDS Base */ 5232 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5233 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5234 WRITE_DATA_DST_SEL(0))); 5235 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5236 amdgpu_ring_write(ring, 0); 5237 amdgpu_ring_write(ring, gds_base); 5238 5239 /* GDS Size */ 5240 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5241 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5242 WRITE_DATA_DST_SEL(0))); 5243 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5244 amdgpu_ring_write(ring, 0); 5245 amdgpu_ring_write(ring, gds_size); 5246 5247 /* GWS */ 5248 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5249 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5250 WRITE_DATA_DST_SEL(0))); 5251 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5252 amdgpu_ring_write(ring, 0); 5253 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5254 5255 /* OA */ 5256 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5257 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5258 WRITE_DATA_DST_SEL(0))); 5259 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5260 amdgpu_ring_write(ring, 0); 5261 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5262 } 5263 5264 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5265 { 5266 WREG32(mmSQ_IND_INDEX, 5267 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5268 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5269 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5270 (SQ_IND_INDEX__FORCE_READ_MASK)); 5271 return RREG32(mmSQ_IND_DATA); 5272 } 5273 5274 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5275 uint32_t wave, uint32_t thread, 5276 uint32_t regno, uint32_t num, uint32_t *out) 5277 { 5278 WREG32(mmSQ_IND_INDEX, 5279 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5280 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5281 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5282 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5283 (SQ_IND_INDEX__FORCE_READ_MASK) | 5284 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5285 while (num--) 5286 *(out++) = RREG32(mmSQ_IND_DATA); 5287 } 5288 5289 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5290 { 5291 /* type 0 wave data */ 5292 dst[(*no_fields)++] = 0; 5293 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5294 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5295 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5296 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5297 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5298 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5299 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5300 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5301 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5302 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5303 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5304 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5305 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5306 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5307 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5308 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5309 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5310 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5311 } 5312 5313 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5314 uint32_t wave, uint32_t start, 5315 uint32_t size, uint32_t *dst) 5316 { 5317 wave_read_regs( 5318 adev, simd, wave, 0, 5319 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5320 } 5321 5322 5323 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5324 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5325 .select_se_sh = &gfx_v8_0_select_se_sh, 5326 .read_wave_data = &gfx_v8_0_read_wave_data, 5327 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5328 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5329 }; 5330 5331 static int gfx_v8_0_early_init(void *handle) 5332 { 5333 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5334 5335 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5336 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5337 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5338 gfx_v8_0_set_ring_funcs(adev); 5339 gfx_v8_0_set_irq_funcs(adev); 5340 gfx_v8_0_set_gds_init(adev); 5341 gfx_v8_0_set_rlc_funcs(adev); 5342 5343 return 0; 5344 } 5345 5346 static int gfx_v8_0_late_init(void *handle) 5347 { 5348 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5349 int r; 5350 5351 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5352 if (r) 5353 return r; 5354 5355 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5356 if (r) 5357 return r; 5358 5359 /* requires IBs so do in late init after IB pool is initialized */ 5360 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5361 if (r) 5362 return r; 5363 5364 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5365 if (r) { 5366 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5367 return r; 5368 } 5369 5370 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5371 if (r) { 5372 DRM_ERROR( 5373 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5374 r); 5375 return r; 5376 } 5377 5378 return 0; 5379 } 5380 5381 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5382 bool enable) 5383 { 5384 if (((adev->asic_type == CHIP_POLARIS11) || 5385 (adev->asic_type == CHIP_POLARIS12) || 5386 (adev->asic_type == CHIP_VEGAM)) && 5387 adev->powerplay.pp_funcs->set_powergating_by_smu) 5388 /* Send msg to SMU via Powerplay */ 5389 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5390 5391 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5392 } 5393 5394 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5395 bool enable) 5396 { 5397 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5398 } 5399 5400 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5401 bool enable) 5402 { 5403 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5404 } 5405 5406 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5407 bool enable) 5408 { 5409 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5410 } 5411 5412 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5413 bool enable) 5414 { 5415 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5416 5417 /* Read any GFX register to wake up GFX. */ 5418 if (!enable) 5419 RREG32(mmDB_RENDER_CONTROL); 5420 } 5421 5422 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5423 bool enable) 5424 { 5425 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5426 cz_enable_gfx_cg_power_gating(adev, true); 5427 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5428 cz_enable_gfx_pipeline_power_gating(adev, true); 5429 } else { 5430 cz_enable_gfx_cg_power_gating(adev, false); 5431 cz_enable_gfx_pipeline_power_gating(adev, false); 5432 } 5433 } 5434 5435 static int gfx_v8_0_set_powergating_state(void *handle, 5436 enum amd_powergating_state state) 5437 { 5438 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5439 bool enable = (state == AMD_PG_STATE_GATE); 5440 5441 if (amdgpu_sriov_vf(adev)) 5442 return 0; 5443 5444 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5445 AMD_PG_SUPPORT_RLC_SMU_HS | 5446 AMD_PG_SUPPORT_CP | 5447 AMD_PG_SUPPORT_GFX_DMG)) 5448 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5449 switch (adev->asic_type) { 5450 case CHIP_CARRIZO: 5451 case CHIP_STONEY: 5452 5453 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5454 cz_enable_sck_slow_down_on_power_up(adev, true); 5455 cz_enable_sck_slow_down_on_power_down(adev, true); 5456 } else { 5457 cz_enable_sck_slow_down_on_power_up(adev, false); 5458 cz_enable_sck_slow_down_on_power_down(adev, false); 5459 } 5460 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5461 cz_enable_cp_power_gating(adev, true); 5462 else 5463 cz_enable_cp_power_gating(adev, false); 5464 5465 cz_update_gfx_cg_power_gating(adev, enable); 5466 5467 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5468 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5469 else 5470 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5471 5472 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5473 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5474 else 5475 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5476 break; 5477 case CHIP_POLARIS11: 5478 case CHIP_POLARIS12: 5479 case CHIP_VEGAM: 5480 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5481 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5482 else 5483 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5484 5485 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5486 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5487 else 5488 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5489 5490 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5491 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5492 else 5493 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5494 break; 5495 default: 5496 break; 5497 } 5498 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5499 AMD_PG_SUPPORT_RLC_SMU_HS | 5500 AMD_PG_SUPPORT_CP | 5501 AMD_PG_SUPPORT_GFX_DMG)) 5502 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5503 return 0; 5504 } 5505 5506 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5507 { 5508 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5509 int data; 5510 5511 if (amdgpu_sriov_vf(adev)) 5512 *flags = 0; 5513 5514 /* AMD_CG_SUPPORT_GFX_MGCG */ 5515 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5516 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5517 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5518 5519 /* AMD_CG_SUPPORT_GFX_CGLG */ 5520 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5521 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5522 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5523 5524 /* AMD_CG_SUPPORT_GFX_CGLS */ 5525 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5526 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5527 5528 /* AMD_CG_SUPPORT_GFX_CGTS */ 5529 data = RREG32(mmCGTS_SM_CTRL_REG); 5530 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5531 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5532 5533 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5534 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5535 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5536 5537 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5538 data = RREG32(mmRLC_MEM_SLP_CNTL); 5539 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5540 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5541 5542 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5543 data = RREG32(mmCP_MEM_SLP_CNTL); 5544 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5545 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5546 } 5547 5548 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5549 uint32_t reg_addr, uint32_t cmd) 5550 { 5551 uint32_t data; 5552 5553 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5554 5555 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5556 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5557 5558 data = RREG32(mmRLC_SERDES_WR_CTRL); 5559 if (adev->asic_type == CHIP_STONEY) 5560 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5561 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5562 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5563 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5564 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5565 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5566 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5567 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5568 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5569 else 5570 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5571 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5572 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5573 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5574 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5575 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5576 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5577 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5578 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5579 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5580 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5581 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5582 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5583 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5584 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5585 5586 WREG32(mmRLC_SERDES_WR_CTRL, data); 5587 } 5588 5589 #define MSG_ENTER_RLC_SAFE_MODE 1 5590 #define MSG_EXIT_RLC_SAFE_MODE 0 5591 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5592 #define RLC_GPR_REG2__REQ__SHIFT 0 5593 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5594 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5595 5596 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5597 { 5598 u32 data; 5599 unsigned i; 5600 5601 data = RREG32(mmRLC_CNTL); 5602 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5603 return; 5604 5605 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5606 data |= RLC_SAFE_MODE__CMD_MASK; 5607 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5608 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5609 WREG32(mmRLC_SAFE_MODE, data); 5610 5611 for (i = 0; i < adev->usec_timeout; i++) { 5612 if ((RREG32(mmRLC_GPM_STAT) & 5613 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5614 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5615 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5616 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5617 break; 5618 udelay(1); 5619 } 5620 5621 for (i = 0; i < adev->usec_timeout; i++) { 5622 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5623 break; 5624 udelay(1); 5625 } 5626 adev->gfx.rlc.in_safe_mode = true; 5627 } 5628 } 5629 5630 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5631 { 5632 u32 data = 0; 5633 unsigned i; 5634 5635 data = RREG32(mmRLC_CNTL); 5636 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5637 return; 5638 5639 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5640 if (adev->gfx.rlc.in_safe_mode) { 5641 data |= RLC_SAFE_MODE__CMD_MASK; 5642 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5643 WREG32(mmRLC_SAFE_MODE, data); 5644 adev->gfx.rlc.in_safe_mode = false; 5645 } 5646 } 5647 5648 for (i = 0; i < adev->usec_timeout; i++) { 5649 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5650 break; 5651 udelay(1); 5652 } 5653 } 5654 5655 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5656 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5657 .exit_safe_mode = iceland_exit_rlc_safe_mode 5658 }; 5659 5660 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5661 bool enable) 5662 { 5663 uint32_t temp, data; 5664 5665 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5666 5667 /* It is disabled by HW by default */ 5668 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5669 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5670 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5671 /* 1 - RLC memory Light sleep */ 5672 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5673 5674 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5675 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5676 } 5677 5678 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5679 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5680 if (adev->flags & AMD_IS_APU) 5681 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5682 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5683 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5684 else 5685 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5686 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5687 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5688 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5689 5690 if (temp != data) 5691 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5692 5693 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5694 gfx_v8_0_wait_for_rlc_serdes(adev); 5695 5696 /* 5 - clear mgcg override */ 5697 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5698 5699 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5700 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5701 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5702 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5703 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5704 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5705 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5706 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5707 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5708 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5709 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5710 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5711 if (temp != data) 5712 WREG32(mmCGTS_SM_CTRL_REG, data); 5713 } 5714 udelay(50); 5715 5716 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5717 gfx_v8_0_wait_for_rlc_serdes(adev); 5718 } else { 5719 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5720 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5721 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5722 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5723 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5724 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5725 if (temp != data) 5726 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5727 5728 /* 2 - disable MGLS in RLC */ 5729 data = RREG32(mmRLC_MEM_SLP_CNTL); 5730 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5731 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5732 WREG32(mmRLC_MEM_SLP_CNTL, data); 5733 } 5734 5735 /* 3 - disable MGLS in CP */ 5736 data = RREG32(mmCP_MEM_SLP_CNTL); 5737 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5738 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5739 WREG32(mmCP_MEM_SLP_CNTL, data); 5740 } 5741 5742 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5743 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5744 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5745 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5746 if (temp != data) 5747 WREG32(mmCGTS_SM_CTRL_REG, data); 5748 5749 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5750 gfx_v8_0_wait_for_rlc_serdes(adev); 5751 5752 /* 6 - set mgcg override */ 5753 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5754 5755 udelay(50); 5756 5757 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5758 gfx_v8_0_wait_for_rlc_serdes(adev); 5759 } 5760 5761 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5762 } 5763 5764 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5765 bool enable) 5766 { 5767 uint32_t temp, temp1, data, data1; 5768 5769 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5770 5771 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5772 5773 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5774 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5775 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5776 if (temp1 != data1) 5777 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5778 5779 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5780 gfx_v8_0_wait_for_rlc_serdes(adev); 5781 5782 /* 2 - clear cgcg override */ 5783 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5784 5785 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5786 gfx_v8_0_wait_for_rlc_serdes(adev); 5787 5788 /* 3 - write cmd to set CGLS */ 5789 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5790 5791 /* 4 - enable cgcg */ 5792 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5793 5794 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5795 /* enable cgls*/ 5796 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5797 5798 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5799 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5800 5801 if (temp1 != data1) 5802 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5803 } else { 5804 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5805 } 5806 5807 if (temp != data) 5808 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5809 5810 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5811 * Cmp_busy/GFX_Idle interrupts 5812 */ 5813 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5814 } else { 5815 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5816 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5817 5818 /* TEST CGCG */ 5819 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5820 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5821 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5822 if (temp1 != data1) 5823 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5824 5825 /* read gfx register to wake up cgcg */ 5826 RREG32(mmCB_CGTT_SCLK_CTRL); 5827 RREG32(mmCB_CGTT_SCLK_CTRL); 5828 RREG32(mmCB_CGTT_SCLK_CTRL); 5829 RREG32(mmCB_CGTT_SCLK_CTRL); 5830 5831 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5832 gfx_v8_0_wait_for_rlc_serdes(adev); 5833 5834 /* write cmd to Set CGCG Overrride */ 5835 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5836 5837 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5838 gfx_v8_0_wait_for_rlc_serdes(adev); 5839 5840 /* write cmd to Clear CGLS */ 5841 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5842 5843 /* disable cgcg, cgls should be disabled too. */ 5844 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5845 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5846 if (temp != data) 5847 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5848 /* enable interrupts again for PG */ 5849 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5850 } 5851 5852 gfx_v8_0_wait_for_rlc_serdes(adev); 5853 5854 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5855 } 5856 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5857 bool enable) 5858 { 5859 if (enable) { 5860 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5861 * === MGCG + MGLS + TS(CG/LS) === 5862 */ 5863 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5864 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5865 } else { 5866 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5867 * === CGCG + CGLS === 5868 */ 5869 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5870 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5871 } 5872 return 0; 5873 } 5874 5875 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5876 enum amd_clockgating_state state) 5877 { 5878 uint32_t msg_id, pp_state = 0; 5879 uint32_t pp_support_state = 0; 5880 5881 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5883 pp_support_state = PP_STATE_SUPPORT_LS; 5884 pp_state = PP_STATE_LS; 5885 } 5886 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5887 pp_support_state |= PP_STATE_SUPPORT_CG; 5888 pp_state |= PP_STATE_CG; 5889 } 5890 if (state == AMD_CG_STATE_UNGATE) 5891 pp_state = 0; 5892 5893 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5894 PP_BLOCK_GFX_CG, 5895 pp_support_state, 5896 pp_state); 5897 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5898 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5899 } 5900 5901 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5903 pp_support_state = PP_STATE_SUPPORT_LS; 5904 pp_state = PP_STATE_LS; 5905 } 5906 5907 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5908 pp_support_state |= PP_STATE_SUPPORT_CG; 5909 pp_state |= PP_STATE_CG; 5910 } 5911 5912 if (state == AMD_CG_STATE_UNGATE) 5913 pp_state = 0; 5914 5915 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5916 PP_BLOCK_GFX_MG, 5917 pp_support_state, 5918 pp_state); 5919 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5920 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5921 } 5922 5923 return 0; 5924 } 5925 5926 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 5927 enum amd_clockgating_state state) 5928 { 5929 5930 uint32_t msg_id, pp_state = 0; 5931 uint32_t pp_support_state = 0; 5932 5933 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5934 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5935 pp_support_state = PP_STATE_SUPPORT_LS; 5936 pp_state = PP_STATE_LS; 5937 } 5938 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5939 pp_support_state |= PP_STATE_SUPPORT_CG; 5940 pp_state |= PP_STATE_CG; 5941 } 5942 if (state == AMD_CG_STATE_UNGATE) 5943 pp_state = 0; 5944 5945 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5946 PP_BLOCK_GFX_CG, 5947 pp_support_state, 5948 pp_state); 5949 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5950 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5951 } 5952 5953 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 5954 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5955 pp_support_state = PP_STATE_SUPPORT_LS; 5956 pp_state = PP_STATE_LS; 5957 } 5958 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5959 pp_support_state |= PP_STATE_SUPPORT_CG; 5960 pp_state |= PP_STATE_CG; 5961 } 5962 if (state == AMD_CG_STATE_UNGATE) 5963 pp_state = 0; 5964 5965 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5966 PP_BLOCK_GFX_3D, 5967 pp_support_state, 5968 pp_state); 5969 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5970 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5971 } 5972 5973 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5974 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5975 pp_support_state = PP_STATE_SUPPORT_LS; 5976 pp_state = PP_STATE_LS; 5977 } 5978 5979 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5980 pp_support_state |= PP_STATE_SUPPORT_CG; 5981 pp_state |= PP_STATE_CG; 5982 } 5983 5984 if (state == AMD_CG_STATE_UNGATE) 5985 pp_state = 0; 5986 5987 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5988 PP_BLOCK_GFX_MG, 5989 pp_support_state, 5990 pp_state); 5991 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 5992 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5993 } 5994 5995 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5996 pp_support_state = PP_STATE_SUPPORT_LS; 5997 5998 if (state == AMD_CG_STATE_UNGATE) 5999 pp_state = 0; 6000 else 6001 pp_state = PP_STATE_LS; 6002 6003 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6004 PP_BLOCK_GFX_RLC, 6005 pp_support_state, 6006 pp_state); 6007 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6008 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6009 } 6010 6011 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6012 pp_support_state = PP_STATE_SUPPORT_LS; 6013 6014 if (state == AMD_CG_STATE_UNGATE) 6015 pp_state = 0; 6016 else 6017 pp_state = PP_STATE_LS; 6018 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6019 PP_BLOCK_GFX_CP, 6020 pp_support_state, 6021 pp_state); 6022 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6023 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6024 } 6025 6026 return 0; 6027 } 6028 6029 static int gfx_v8_0_set_clockgating_state(void *handle, 6030 enum amd_clockgating_state state) 6031 { 6032 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6033 6034 if (amdgpu_sriov_vf(adev)) 6035 return 0; 6036 6037 switch (adev->asic_type) { 6038 case CHIP_FIJI: 6039 case CHIP_CARRIZO: 6040 case CHIP_STONEY: 6041 gfx_v8_0_update_gfx_clock_gating(adev, 6042 state == AMD_CG_STATE_GATE); 6043 break; 6044 case CHIP_TONGA: 6045 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6046 break; 6047 case CHIP_POLARIS10: 6048 case CHIP_POLARIS11: 6049 case CHIP_POLARIS12: 6050 case CHIP_VEGAM: 6051 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6052 break; 6053 default: 6054 break; 6055 } 6056 return 0; 6057 } 6058 6059 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6060 { 6061 return ring->adev->wb.wb[ring->rptr_offs]; 6062 } 6063 6064 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6065 { 6066 struct amdgpu_device *adev = ring->adev; 6067 6068 if (ring->use_doorbell) 6069 /* XXX check if swapping is necessary on BE */ 6070 return ring->adev->wb.wb[ring->wptr_offs]; 6071 else 6072 return RREG32(mmCP_RB0_WPTR); 6073 } 6074 6075 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6076 { 6077 struct amdgpu_device *adev = ring->adev; 6078 6079 if (ring->use_doorbell) { 6080 /* XXX check if swapping is necessary on BE */ 6081 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6082 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6083 } else { 6084 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6085 (void)RREG32(mmCP_RB0_WPTR); 6086 } 6087 } 6088 6089 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6090 { 6091 u32 ref_and_mask, reg_mem_engine; 6092 6093 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6094 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6095 switch (ring->me) { 6096 case 1: 6097 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6098 break; 6099 case 2: 6100 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6101 break; 6102 default: 6103 return; 6104 } 6105 reg_mem_engine = 0; 6106 } else { 6107 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6108 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6109 } 6110 6111 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6112 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6113 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6114 reg_mem_engine)); 6115 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6116 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6117 amdgpu_ring_write(ring, ref_and_mask); 6118 amdgpu_ring_write(ring, ref_and_mask); 6119 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6120 } 6121 6122 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6123 { 6124 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6125 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6126 EVENT_INDEX(4)); 6127 6128 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6129 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6130 EVENT_INDEX(0)); 6131 } 6132 6133 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6134 struct amdgpu_ib *ib, 6135 unsigned vmid, bool ctx_switch) 6136 { 6137 u32 header, control = 0; 6138 6139 if (ib->flags & AMDGPU_IB_FLAG_CE) 6140 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6141 else 6142 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6143 6144 control |= ib->length_dw | (vmid << 24); 6145 6146 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6147 control |= INDIRECT_BUFFER_PRE_ENB(1); 6148 6149 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6150 gfx_v8_0_ring_emit_de_meta(ring); 6151 } 6152 6153 amdgpu_ring_write(ring, header); 6154 amdgpu_ring_write(ring, 6155 #ifdef __BIG_ENDIAN 6156 (2 << 0) | 6157 #endif 6158 (ib->gpu_addr & 0xFFFFFFFC)); 6159 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6160 amdgpu_ring_write(ring, control); 6161 } 6162 6163 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6164 struct amdgpu_ib *ib, 6165 unsigned vmid, bool ctx_switch) 6166 { 6167 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6168 6169 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6170 amdgpu_ring_write(ring, 6171 #ifdef __BIG_ENDIAN 6172 (2 << 0) | 6173 #endif 6174 (ib->gpu_addr & 0xFFFFFFFC)); 6175 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6176 amdgpu_ring_write(ring, control); 6177 } 6178 6179 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6180 u64 seq, unsigned flags) 6181 { 6182 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6183 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6184 6185 /* EVENT_WRITE_EOP - flush caches, send int */ 6186 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6187 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6188 EOP_TC_ACTION_EN | 6189 EOP_TC_WB_ACTION_EN | 6190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6191 EVENT_INDEX(5))); 6192 amdgpu_ring_write(ring, addr & 0xfffffffc); 6193 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6194 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6195 amdgpu_ring_write(ring, lower_32_bits(seq)); 6196 amdgpu_ring_write(ring, upper_32_bits(seq)); 6197 6198 } 6199 6200 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6201 { 6202 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6203 uint32_t seq = ring->fence_drv.sync_seq; 6204 uint64_t addr = ring->fence_drv.gpu_addr; 6205 6206 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6207 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6208 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6209 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6210 amdgpu_ring_write(ring, addr & 0xfffffffc); 6211 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6212 amdgpu_ring_write(ring, seq); 6213 amdgpu_ring_write(ring, 0xffffffff); 6214 amdgpu_ring_write(ring, 4); /* poll interval */ 6215 } 6216 6217 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6218 unsigned vmid, uint64_t pd_addr) 6219 { 6220 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6221 6222 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6223 6224 /* wait for the invalidate to complete */ 6225 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6226 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6227 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6228 WAIT_REG_MEM_ENGINE(0))); /* me */ 6229 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6230 amdgpu_ring_write(ring, 0); 6231 amdgpu_ring_write(ring, 0); /* ref */ 6232 amdgpu_ring_write(ring, 0); /* mask */ 6233 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6234 6235 /* compute doesn't have PFP */ 6236 if (usepfp) { 6237 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6238 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6239 amdgpu_ring_write(ring, 0x0); 6240 } 6241 } 6242 6243 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6244 { 6245 return ring->adev->wb.wb[ring->wptr_offs]; 6246 } 6247 6248 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6249 { 6250 struct amdgpu_device *adev = ring->adev; 6251 6252 /* XXX check if swapping is necessary on BE */ 6253 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6254 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6255 } 6256 6257 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6258 bool acquire) 6259 { 6260 struct amdgpu_device *adev = ring->adev; 6261 int pipe_num, tmp, reg; 6262 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6263 6264 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6265 6266 /* first me only has 2 entries, GFX and HP3D */ 6267 if (ring->me > 0) 6268 pipe_num -= 2; 6269 6270 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6271 tmp = RREG32(reg); 6272 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6273 WREG32(reg, tmp); 6274 } 6275 6276 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6277 struct amdgpu_ring *ring, 6278 bool acquire) 6279 { 6280 int i, pipe; 6281 bool reserve; 6282 struct amdgpu_ring *iring; 6283 6284 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6285 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6286 if (acquire) 6287 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6288 else 6289 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6290 6291 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6292 /* Clear all reservations - everyone reacquires all resources */ 6293 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6294 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6295 true); 6296 6297 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6298 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6299 true); 6300 } else { 6301 /* Lower all pipes without a current reservation */ 6302 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6303 iring = &adev->gfx.gfx_ring[i]; 6304 pipe = amdgpu_gfx_queue_to_bit(adev, 6305 iring->me, 6306 iring->pipe, 6307 0); 6308 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6309 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6310 } 6311 6312 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6313 iring = &adev->gfx.compute_ring[i]; 6314 pipe = amdgpu_gfx_queue_to_bit(adev, 6315 iring->me, 6316 iring->pipe, 6317 0); 6318 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6319 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6320 } 6321 } 6322 6323 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6324 } 6325 6326 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6327 struct amdgpu_ring *ring, 6328 bool acquire) 6329 { 6330 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6331 uint32_t queue_priority = acquire ? 0xf : 0x0; 6332 6333 mutex_lock(&adev->srbm_mutex); 6334 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6335 6336 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6337 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6338 6339 vi_srbm_select(adev, 0, 0, 0, 0); 6340 mutex_unlock(&adev->srbm_mutex); 6341 } 6342 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6343 enum drm_sched_priority priority) 6344 { 6345 struct amdgpu_device *adev = ring->adev; 6346 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6347 6348 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6349 return; 6350 6351 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6352 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6353 } 6354 6355 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6356 u64 addr, u64 seq, 6357 unsigned flags) 6358 { 6359 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6360 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6361 6362 /* RELEASE_MEM - flush caches, send int */ 6363 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6364 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6365 EOP_TC_ACTION_EN | 6366 EOP_TC_WB_ACTION_EN | 6367 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6368 EVENT_INDEX(5))); 6369 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6370 amdgpu_ring_write(ring, addr & 0xfffffffc); 6371 amdgpu_ring_write(ring, upper_32_bits(addr)); 6372 amdgpu_ring_write(ring, lower_32_bits(seq)); 6373 amdgpu_ring_write(ring, upper_32_bits(seq)); 6374 } 6375 6376 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6377 u64 seq, unsigned int flags) 6378 { 6379 /* we only allocate 32bit for each seq wb address */ 6380 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6381 6382 /* write fence seq to the "addr" */ 6383 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6384 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6385 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6386 amdgpu_ring_write(ring, lower_32_bits(addr)); 6387 amdgpu_ring_write(ring, upper_32_bits(addr)); 6388 amdgpu_ring_write(ring, lower_32_bits(seq)); 6389 6390 if (flags & AMDGPU_FENCE_FLAG_INT) { 6391 /* set register to trigger INT */ 6392 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6393 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6394 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6395 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6396 amdgpu_ring_write(ring, 0); 6397 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6398 } 6399 } 6400 6401 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6402 { 6403 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6404 amdgpu_ring_write(ring, 0); 6405 } 6406 6407 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6408 { 6409 uint32_t dw2 = 0; 6410 6411 if (amdgpu_sriov_vf(ring->adev)) 6412 gfx_v8_0_ring_emit_ce_meta(ring); 6413 6414 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6415 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6416 gfx_v8_0_ring_emit_vgt_flush(ring); 6417 /* set load_global_config & load_global_uconfig */ 6418 dw2 |= 0x8001; 6419 /* set load_cs_sh_regs */ 6420 dw2 |= 0x01000000; 6421 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6422 dw2 |= 0x10002; 6423 6424 /* set load_ce_ram if preamble presented */ 6425 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6426 dw2 |= 0x10000000; 6427 } else { 6428 /* still load_ce_ram if this is the first time preamble presented 6429 * although there is no context switch happens. 6430 */ 6431 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6432 dw2 |= 0x10000000; 6433 } 6434 6435 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6436 amdgpu_ring_write(ring, dw2); 6437 amdgpu_ring_write(ring, 0); 6438 } 6439 6440 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6441 { 6442 unsigned ret; 6443 6444 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6445 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6446 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6447 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6448 ret = ring->wptr & ring->buf_mask; 6449 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6450 return ret; 6451 } 6452 6453 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6454 { 6455 unsigned cur; 6456 6457 BUG_ON(offset > ring->buf_mask); 6458 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6459 6460 cur = (ring->wptr & ring->buf_mask) - 1; 6461 if (likely(cur > offset)) 6462 ring->ring[offset] = cur - offset; 6463 else 6464 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6465 } 6466 6467 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6468 { 6469 struct amdgpu_device *adev = ring->adev; 6470 6471 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6472 amdgpu_ring_write(ring, 0 | /* src: register*/ 6473 (5 << 8) | /* dst: memory */ 6474 (1 << 20)); /* write confirm */ 6475 amdgpu_ring_write(ring, reg); 6476 amdgpu_ring_write(ring, 0); 6477 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6478 adev->virt.reg_val_offs * 4)); 6479 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6480 adev->virt.reg_val_offs * 4)); 6481 } 6482 6483 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6484 uint32_t val) 6485 { 6486 uint32_t cmd; 6487 6488 switch (ring->funcs->type) { 6489 case AMDGPU_RING_TYPE_GFX: 6490 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6491 break; 6492 case AMDGPU_RING_TYPE_KIQ: 6493 cmd = 1 << 16; /* no inc addr */ 6494 break; 6495 default: 6496 cmd = WR_CONFIRM; 6497 break; 6498 } 6499 6500 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6501 amdgpu_ring_write(ring, cmd); 6502 amdgpu_ring_write(ring, reg); 6503 amdgpu_ring_write(ring, 0); 6504 amdgpu_ring_write(ring, val); 6505 } 6506 6507 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 6508 { 6509 struct amdgpu_device *adev = ring->adev; 6510 uint32_t value = 0; 6511 6512 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6513 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6514 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6515 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6516 WREG32(mmSQ_CMD, value); 6517 } 6518 6519 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6520 enum amdgpu_interrupt_state state) 6521 { 6522 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6523 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6524 } 6525 6526 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6527 int me, int pipe, 6528 enum amdgpu_interrupt_state state) 6529 { 6530 u32 mec_int_cntl, mec_int_cntl_reg; 6531 6532 /* 6533 * amdgpu controls only the first MEC. That's why this function only 6534 * handles the setting of interrupts for this specific MEC. All other 6535 * pipes' interrupts are set by amdkfd. 6536 */ 6537 6538 if (me == 1) { 6539 switch (pipe) { 6540 case 0: 6541 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6542 break; 6543 case 1: 6544 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6545 break; 6546 case 2: 6547 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6548 break; 6549 case 3: 6550 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6551 break; 6552 default: 6553 DRM_DEBUG("invalid pipe %d\n", pipe); 6554 return; 6555 } 6556 } else { 6557 DRM_DEBUG("invalid me %d\n", me); 6558 return; 6559 } 6560 6561 switch (state) { 6562 case AMDGPU_IRQ_STATE_DISABLE: 6563 mec_int_cntl = RREG32(mec_int_cntl_reg); 6564 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6565 WREG32(mec_int_cntl_reg, mec_int_cntl); 6566 break; 6567 case AMDGPU_IRQ_STATE_ENABLE: 6568 mec_int_cntl = RREG32(mec_int_cntl_reg); 6569 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6570 WREG32(mec_int_cntl_reg, mec_int_cntl); 6571 break; 6572 default: 6573 break; 6574 } 6575 } 6576 6577 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6578 struct amdgpu_irq_src *source, 6579 unsigned type, 6580 enum amdgpu_interrupt_state state) 6581 { 6582 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6583 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6584 6585 return 0; 6586 } 6587 6588 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6589 struct amdgpu_irq_src *source, 6590 unsigned type, 6591 enum amdgpu_interrupt_state state) 6592 { 6593 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6594 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6595 6596 return 0; 6597 } 6598 6599 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6600 struct amdgpu_irq_src *src, 6601 unsigned type, 6602 enum amdgpu_interrupt_state state) 6603 { 6604 switch (type) { 6605 case AMDGPU_CP_IRQ_GFX_EOP: 6606 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6607 break; 6608 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6609 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6610 break; 6611 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6612 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6613 break; 6614 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6615 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6616 break; 6617 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6618 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6619 break; 6620 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6621 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6622 break; 6623 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6624 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6625 break; 6626 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6627 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6628 break; 6629 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6630 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6631 break; 6632 default: 6633 break; 6634 } 6635 return 0; 6636 } 6637 6638 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6639 struct amdgpu_irq_src *source, 6640 unsigned int type, 6641 enum amdgpu_interrupt_state state) 6642 { 6643 int enable_flag; 6644 6645 switch (state) { 6646 case AMDGPU_IRQ_STATE_DISABLE: 6647 enable_flag = 0; 6648 break; 6649 6650 case AMDGPU_IRQ_STATE_ENABLE: 6651 enable_flag = 1; 6652 break; 6653 6654 default: 6655 return -EINVAL; 6656 } 6657 6658 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6659 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6660 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6661 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6662 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6663 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6664 enable_flag); 6665 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6666 enable_flag); 6667 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6668 enable_flag); 6669 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6670 enable_flag); 6671 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6672 enable_flag); 6673 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6674 enable_flag); 6675 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6676 enable_flag); 6677 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6678 enable_flag); 6679 6680 return 0; 6681 } 6682 6683 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6684 struct amdgpu_irq_src *source, 6685 unsigned int type, 6686 enum amdgpu_interrupt_state state) 6687 { 6688 int enable_flag; 6689 6690 switch (state) { 6691 case AMDGPU_IRQ_STATE_DISABLE: 6692 enable_flag = 1; 6693 break; 6694 6695 case AMDGPU_IRQ_STATE_ENABLE: 6696 enable_flag = 0; 6697 break; 6698 6699 default: 6700 return -EINVAL; 6701 } 6702 6703 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6704 enable_flag); 6705 6706 return 0; 6707 } 6708 6709 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6710 struct amdgpu_irq_src *source, 6711 struct amdgpu_iv_entry *entry) 6712 { 6713 int i; 6714 u8 me_id, pipe_id, queue_id; 6715 struct amdgpu_ring *ring; 6716 6717 DRM_DEBUG("IH: CP EOP\n"); 6718 me_id = (entry->ring_id & 0x0c) >> 2; 6719 pipe_id = (entry->ring_id & 0x03) >> 0; 6720 queue_id = (entry->ring_id & 0x70) >> 4; 6721 6722 switch (me_id) { 6723 case 0: 6724 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6725 break; 6726 case 1: 6727 case 2: 6728 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6729 ring = &adev->gfx.compute_ring[i]; 6730 /* Per-queue interrupt is supported for MEC starting from VI. 6731 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6732 */ 6733 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6734 amdgpu_fence_process(ring); 6735 } 6736 break; 6737 } 6738 return 0; 6739 } 6740 6741 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6742 struct amdgpu_irq_src *source, 6743 struct amdgpu_iv_entry *entry) 6744 { 6745 DRM_ERROR("Illegal register access in command stream\n"); 6746 schedule_work(&adev->reset_work); 6747 return 0; 6748 } 6749 6750 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6751 struct amdgpu_irq_src *source, 6752 struct amdgpu_iv_entry *entry) 6753 { 6754 DRM_ERROR("Illegal instruction in command stream\n"); 6755 schedule_work(&adev->reset_work); 6756 return 0; 6757 } 6758 6759 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6760 struct amdgpu_irq_src *source, 6761 struct amdgpu_iv_entry *entry) 6762 { 6763 DRM_ERROR("CP EDC/ECC error detected."); 6764 return 0; 6765 } 6766 6767 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6768 { 6769 u32 enc, se_id, sh_id, cu_id; 6770 char type[20]; 6771 int sq_edc_source = -1; 6772 6773 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6774 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6775 6776 switch (enc) { 6777 case 0: 6778 DRM_INFO("SQ general purpose intr detected:" 6779 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6780 "host_cmd_overflow %d, cmd_timestamp %d," 6781 "reg_timestamp %d, thread_trace_buff_full %d," 6782 "wlt %d, thread_trace %d.\n", 6783 se_id, 6784 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6785 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6786 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6787 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6788 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 6789 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 6790 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 6791 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 6792 ); 6793 break; 6794 case 1: 6795 case 2: 6796 6797 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 6798 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 6799 6800 /* 6801 * This function can be called either directly from ISR 6802 * or from BH in which case we can access SQ_EDC_INFO 6803 * instance 6804 */ 6805 if (in_task()) { 6806 mutex_lock(&adev->grbm_idx_mutex); 6807 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 6808 6809 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 6810 6811 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6812 mutex_unlock(&adev->grbm_idx_mutex); 6813 } 6814 6815 if (enc == 1) 6816 sprintf(type, "instruction intr"); 6817 else 6818 sprintf(type, "EDC/ECC error"); 6819 6820 DRM_INFO( 6821 "SQ %s detected: " 6822 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 6823 "trap %s, sq_ed_info.source %s.\n", 6824 type, se_id, sh_id, cu_id, 6825 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 6826 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 6827 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 6828 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 6829 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 6830 ); 6831 break; 6832 default: 6833 DRM_ERROR("SQ invalid encoding type\n."); 6834 } 6835 } 6836 6837 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 6838 { 6839 6840 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 6841 struct sq_work *sq_work = container_of(work, struct sq_work, work); 6842 6843 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 6844 } 6845 6846 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 6847 struct amdgpu_irq_src *source, 6848 struct amdgpu_iv_entry *entry) 6849 { 6850 unsigned ih_data = entry->src_data[0]; 6851 6852 /* 6853 * Try to submit work so SQ_EDC_INFO can be accessed from 6854 * BH. If previous work submission hasn't finished yet 6855 * just print whatever info is possible directly from the ISR. 6856 */ 6857 if (work_pending(&adev->gfx.sq_work.work)) { 6858 gfx_v8_0_parse_sq_irq(adev, ih_data); 6859 } else { 6860 adev->gfx.sq_work.ih_data = ih_data; 6861 schedule_work(&adev->gfx.sq_work.work); 6862 } 6863 6864 return 0; 6865 } 6866 6867 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6868 .name = "gfx_v8_0", 6869 .early_init = gfx_v8_0_early_init, 6870 .late_init = gfx_v8_0_late_init, 6871 .sw_init = gfx_v8_0_sw_init, 6872 .sw_fini = gfx_v8_0_sw_fini, 6873 .hw_init = gfx_v8_0_hw_init, 6874 .hw_fini = gfx_v8_0_hw_fini, 6875 .suspend = gfx_v8_0_suspend, 6876 .resume = gfx_v8_0_resume, 6877 .is_idle = gfx_v8_0_is_idle, 6878 .wait_for_idle = gfx_v8_0_wait_for_idle, 6879 .check_soft_reset = gfx_v8_0_check_soft_reset, 6880 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6881 .soft_reset = gfx_v8_0_soft_reset, 6882 .post_soft_reset = gfx_v8_0_post_soft_reset, 6883 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6884 .set_powergating_state = gfx_v8_0_set_powergating_state, 6885 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6886 }; 6887 6888 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6889 .type = AMDGPU_RING_TYPE_GFX, 6890 .align_mask = 0xff, 6891 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6892 .support_64bit_ptrs = false, 6893 .get_rptr = gfx_v8_0_ring_get_rptr, 6894 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6895 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6896 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6897 5 + /* COND_EXEC */ 6898 7 + /* PIPELINE_SYNC */ 6899 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 6900 8 + /* FENCE for VM_FLUSH */ 6901 20 + /* GDS switch */ 6902 4 + /* double SWITCH_BUFFER, 6903 the first COND_EXEC jump to the place just 6904 prior to this double SWITCH_BUFFER */ 6905 5 + /* COND_EXEC */ 6906 7 + /* HDP_flush */ 6907 4 + /* VGT_flush */ 6908 14 + /* CE_META */ 6909 31 + /* DE_META */ 6910 3 + /* CNTX_CTRL */ 6911 5 + /* HDP_INVL */ 6912 8 + 8 + /* FENCE x2 */ 6913 2, /* SWITCH_BUFFER */ 6914 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6915 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6916 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6917 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6918 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6919 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6920 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6921 .test_ring = gfx_v8_0_ring_test_ring, 6922 .test_ib = gfx_v8_0_ring_test_ib, 6923 .insert_nop = amdgpu_ring_insert_nop, 6924 .pad_ib = amdgpu_ring_generic_pad_ib, 6925 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6926 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6927 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6928 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 6929 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6930 .soft_recovery = gfx_v8_0_ring_soft_recovery, 6931 }; 6932 6933 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6934 .type = AMDGPU_RING_TYPE_COMPUTE, 6935 .align_mask = 0xff, 6936 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6937 .support_64bit_ptrs = false, 6938 .get_rptr = gfx_v8_0_ring_get_rptr, 6939 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6940 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6941 .emit_frame_size = 6942 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6943 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6944 5 + /* hdp_invalidate */ 6945 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6946 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6947 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6948 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6949 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6950 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6951 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6952 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6953 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6954 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6955 .test_ring = gfx_v8_0_ring_test_ring, 6956 .test_ib = gfx_v8_0_ring_test_ib, 6957 .insert_nop = amdgpu_ring_insert_nop, 6958 .pad_ib = amdgpu_ring_generic_pad_ib, 6959 .set_priority = gfx_v8_0_ring_set_priority_compute, 6960 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6961 }; 6962 6963 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6964 .type = AMDGPU_RING_TYPE_KIQ, 6965 .align_mask = 0xff, 6966 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6967 .support_64bit_ptrs = false, 6968 .get_rptr = gfx_v8_0_ring_get_rptr, 6969 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6970 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6971 .emit_frame_size = 6972 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6973 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6974 5 + /* hdp_invalidate */ 6975 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6976 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6977 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6978 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 6979 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6980 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6981 .test_ring = gfx_v8_0_ring_test_ring, 6982 .test_ib = gfx_v8_0_ring_test_ib, 6983 .insert_nop = amdgpu_ring_insert_nop, 6984 .pad_ib = amdgpu_ring_generic_pad_ib, 6985 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6986 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6987 }; 6988 6989 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6990 { 6991 int i; 6992 6993 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6994 6995 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6996 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6997 6998 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6999 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7000 } 7001 7002 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7003 .set = gfx_v8_0_set_eop_interrupt_state, 7004 .process = gfx_v8_0_eop_irq, 7005 }; 7006 7007 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7008 .set = gfx_v8_0_set_priv_reg_fault_state, 7009 .process = gfx_v8_0_priv_reg_irq, 7010 }; 7011 7012 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7013 .set = gfx_v8_0_set_priv_inst_fault_state, 7014 .process = gfx_v8_0_priv_inst_irq, 7015 }; 7016 7017 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7018 .set = gfx_v8_0_set_cp_ecc_int_state, 7019 .process = gfx_v8_0_cp_ecc_error_irq, 7020 }; 7021 7022 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7023 .set = gfx_v8_0_set_sq_int_state, 7024 .process = gfx_v8_0_sq_irq, 7025 }; 7026 7027 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7028 { 7029 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7030 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7031 7032 adev->gfx.priv_reg_irq.num_types = 1; 7033 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7034 7035 adev->gfx.priv_inst_irq.num_types = 1; 7036 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7037 7038 adev->gfx.cp_ecc_error_irq.num_types = 1; 7039 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7040 7041 adev->gfx.sq_irq.num_types = 1; 7042 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7043 } 7044 7045 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7046 { 7047 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7048 } 7049 7050 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7051 { 7052 /* init asci gds info */ 7053 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7054 adev->gds.gws.total_size = 64; 7055 adev->gds.oa.total_size = 16; 7056 7057 if (adev->gds.mem.total_size == 64 * 1024) { 7058 adev->gds.mem.gfx_partition_size = 4096; 7059 adev->gds.mem.cs_partition_size = 4096; 7060 7061 adev->gds.gws.gfx_partition_size = 4; 7062 adev->gds.gws.cs_partition_size = 4; 7063 7064 adev->gds.oa.gfx_partition_size = 4; 7065 adev->gds.oa.cs_partition_size = 1; 7066 } else { 7067 adev->gds.mem.gfx_partition_size = 1024; 7068 adev->gds.mem.cs_partition_size = 1024; 7069 7070 adev->gds.gws.gfx_partition_size = 16; 7071 adev->gds.gws.cs_partition_size = 16; 7072 7073 adev->gds.oa.gfx_partition_size = 4; 7074 adev->gds.oa.cs_partition_size = 4; 7075 } 7076 } 7077 7078 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7079 u32 bitmap) 7080 { 7081 u32 data; 7082 7083 if (!bitmap) 7084 return; 7085 7086 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7087 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7088 7089 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7090 } 7091 7092 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7093 { 7094 u32 data, mask; 7095 7096 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7097 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7098 7099 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7100 7101 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7102 } 7103 7104 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7105 { 7106 int i, j, k, counter, active_cu_number = 0; 7107 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7108 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7109 unsigned disable_masks[4 * 2]; 7110 u32 ao_cu_num; 7111 7112 memset(cu_info, 0, sizeof(*cu_info)); 7113 7114 if (adev->flags & AMD_IS_APU) 7115 ao_cu_num = 2; 7116 else 7117 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7118 7119 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7120 7121 mutex_lock(&adev->grbm_idx_mutex); 7122 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7123 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7124 mask = 1; 7125 ao_bitmap = 0; 7126 counter = 0; 7127 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7128 if (i < 4 && j < 2) 7129 gfx_v8_0_set_user_cu_inactive_bitmap( 7130 adev, disable_masks[i * 2 + j]); 7131 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7132 cu_info->bitmap[i][j] = bitmap; 7133 7134 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7135 if (bitmap & mask) { 7136 if (counter < ao_cu_num) 7137 ao_bitmap |= mask; 7138 counter ++; 7139 } 7140 mask <<= 1; 7141 } 7142 active_cu_number += counter; 7143 if (i < 2 && j < 2) 7144 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7145 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7146 } 7147 } 7148 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7149 mutex_unlock(&adev->grbm_idx_mutex); 7150 7151 cu_info->number = active_cu_number; 7152 cu_info->ao_cu_mask = ao_cu_mask; 7153 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7154 cu_info->max_waves_per_simd = 10; 7155 cu_info->max_scratch_slots_per_cu = 32; 7156 cu_info->wave_front_size = 64; 7157 cu_info->lds_size = 64; 7158 } 7159 7160 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7161 { 7162 .type = AMD_IP_BLOCK_TYPE_GFX, 7163 .major = 8, 7164 .minor = 0, 7165 .rev = 0, 7166 .funcs = &gfx_v8_0_ip_funcs, 7167 }; 7168 7169 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7170 { 7171 .type = AMD_IP_BLOCK_TYPE_GFX, 7172 .major = 8, 7173 .minor = 1, 7174 .rev = 0, 7175 .funcs = &gfx_v8_0_ip_funcs, 7176 }; 7177 7178 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7179 { 7180 uint64_t ce_payload_addr; 7181 int cnt_ce; 7182 union { 7183 struct vi_ce_ib_state regular; 7184 struct vi_ce_ib_state_chained_ib chained; 7185 } ce_payload = {}; 7186 7187 if (ring->adev->virt.chained_ib_support) { 7188 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7189 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7190 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7191 } else { 7192 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7193 offsetof(struct vi_gfx_meta_data, ce_payload); 7194 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7195 } 7196 7197 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7198 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7199 WRITE_DATA_DST_SEL(8) | 7200 WR_CONFIRM) | 7201 WRITE_DATA_CACHE_POLICY(0)); 7202 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7203 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7204 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7205 } 7206 7207 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7208 { 7209 uint64_t de_payload_addr, gds_addr, csa_addr; 7210 int cnt_de; 7211 union { 7212 struct vi_de_ib_state regular; 7213 struct vi_de_ib_state_chained_ib chained; 7214 } de_payload = {}; 7215 7216 csa_addr = amdgpu_csa_vaddr(ring->adev); 7217 gds_addr = csa_addr + 4096; 7218 if (ring->adev->virt.chained_ib_support) { 7219 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7220 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7221 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7222 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7223 } else { 7224 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7225 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7226 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7227 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7228 } 7229 7230 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7231 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7232 WRITE_DATA_DST_SEL(8) | 7233 WR_CONFIRM) | 7234 WRITE_DATA_CACHE_POLICY(0)); 7235 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7236 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7237 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7238 } 7239