1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "amdgpu_atombios.h" 31 #include "atombios_i2c.h" 32 #include "clearstate_vi.h" 33 34 #include "gmc/gmc_8_2_d.h" 35 #include "gmc/gmc_8_2_sh_mask.h" 36 37 #include "oss/oss_3_0_d.h" 38 #include "oss/oss_3_0_sh_mask.h" 39 40 #include "bif/bif_5_0_d.h" 41 #include "bif/bif_5_0_sh_mask.h" 42 43 #include "gca/gfx_8_0_d.h" 44 #include "gca/gfx_8_0_enum.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "gca/gfx_8_0_enum.h" 47 48 #include "dce/dce_10_0_d.h" 49 #include "dce/dce_10_0_sh_mask.h" 50 51 #include "smu/smu_7_1_3_d.h" 52 53 #define GFX8_NUM_GFX_RINGS 1 54 #define GFX8_NUM_COMPUTE_RINGS 8 55 56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 60 61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 70 71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 77 78 /* BPM SERDES CMD */ 79 #define SET_BPM_SERDES_CMD 1 80 #define CLE_BPM_SERDES_CMD 0 81 82 /* BPM Register Address*/ 83 enum { 84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 89 BPM_REG_FGCG_MAX 90 }; 91 92 #define RLC_FormatDirectRegListLength 14 93 94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 140 141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 142 { 143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 159 }; 160 161 static const u32 golden_settings_tonga_a11[] = 162 { 163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 165 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 166 mmGB_GPU_ID, 0x0000000f, 0x00000000, 167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 172 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 178 }; 179 180 static const u32 tonga_golden_common_all[] = 181 { 182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 190 }; 191 192 static const u32 tonga_mgcg_cgcg_init[] = 193 { 194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 269 }; 270 271 static const u32 golden_settings_polaris11_a11[] = 272 { 273 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 274 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 275 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 276 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 277 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 278 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 279 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 280 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 281 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 282 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 283 mmSQ_CONFIG, 0x07f80000, 0x01180000, 284 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 285 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 286 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 287 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 288 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 289 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 290 }; 291 292 static const u32 polaris11_golden_common_all[] = 293 { 294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 300 }; 301 302 static const u32 golden_settings_polaris10_a11[] = 303 { 304 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 305 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 306 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 307 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 308 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 309 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 310 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 311 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 312 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 313 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 314 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 315 mmSQ_CONFIG, 0x07f80000, 0x07180000, 316 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 317 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 318 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 319 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 321 }; 322 323 static const u32 polaris10_golden_common_all[] = 324 { 325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 326 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 327 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 333 }; 334 335 static const u32 fiji_golden_common_all[] = 336 { 337 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 338 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 339 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 340 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 341 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 342 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 343 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 344 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 346 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 347 }; 348 349 static const u32 golden_settings_fiji_a10[] = 350 { 351 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 352 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 353 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 354 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 355 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 356 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 357 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 358 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 359 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 360 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 361 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 362 }; 363 364 static const u32 fiji_mgcg_cgcg_init[] = 365 { 366 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 367 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 368 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 369 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 370 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 373 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 375 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 377 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 384 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 385 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 386 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 387 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 388 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 391 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 392 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 393 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 394 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 395 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 396 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 397 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 398 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 399 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 400 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 401 }; 402 403 static const u32 golden_settings_iceland_a11[] = 404 { 405 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 406 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 407 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 408 mmGB_GPU_ID, 0x0000000f, 0x00000000, 409 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 410 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 411 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 412 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 413 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 414 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 415 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 416 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 417 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 418 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 419 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 420 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 421 }; 422 423 static const u32 iceland_golden_common_all[] = 424 { 425 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 426 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 427 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 428 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 429 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 430 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 431 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 432 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 433 }; 434 435 static const u32 iceland_mgcg_cgcg_init[] = 436 { 437 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 438 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 439 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 442 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 443 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 444 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 446 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 448 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 455 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 456 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 457 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 458 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 459 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 460 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 462 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 463 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 464 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 465 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 466 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 467 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 468 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 469 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 470 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 471 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 472 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 473 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 474 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 475 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 476 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 477 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 478 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 479 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 480 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 481 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 482 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 483 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 484 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 485 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 486 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 487 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 488 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 489 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 490 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 491 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 492 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 493 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 494 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 495 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 496 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 497 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 498 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 499 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 500 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 501 }; 502 503 static const u32 cz_golden_settings_a11[] = 504 { 505 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 506 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 507 mmGB_GPU_ID, 0x0000000f, 0x00000000, 508 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 509 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 510 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 511 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 512 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 513 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 517 }; 518 519 static const u32 cz_golden_common_all[] = 520 { 521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 524 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 529 }; 530 531 static const u32 cz_mgcg_cgcg_init[] = 532 { 533 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 534 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 535 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 536 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 537 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 541 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 542 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 543 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 544 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 551 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 552 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 553 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 554 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 555 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 558 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 559 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 560 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 561 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 562 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 563 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 564 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 565 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 566 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 567 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 568 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 569 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 570 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 571 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 572 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 573 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 574 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 575 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 576 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 577 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 578 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 579 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 580 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 581 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 582 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 583 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 584 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 585 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 586 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 587 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 588 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 589 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 590 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 591 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 592 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 593 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 594 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 595 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 596 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 597 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 598 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 599 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 600 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 601 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 602 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 603 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 604 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 605 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 606 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 607 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 608 }; 609 610 static const u32 stoney_golden_settings_a11[] = 611 { 612 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 613 mmGB_GPU_ID, 0x0000000f, 0x00000000, 614 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 615 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 616 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 617 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 618 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 619 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 620 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 621 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 622 }; 623 624 static const u32 stoney_golden_common_all[] = 625 { 626 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 627 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 628 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 629 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 630 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 631 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 632 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 633 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 634 }; 635 636 static const u32 stoney_mgcg_cgcg_init[] = 637 { 638 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 639 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 640 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 641 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 642 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 643 }; 644 645 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 646 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 647 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 648 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 649 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 650 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 651 652 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 653 { 654 switch (adev->asic_type) { 655 case CHIP_TOPAZ: 656 amdgpu_program_register_sequence(adev, 657 iceland_mgcg_cgcg_init, 658 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 659 amdgpu_program_register_sequence(adev, 660 golden_settings_iceland_a11, 661 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 662 amdgpu_program_register_sequence(adev, 663 iceland_golden_common_all, 664 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 665 break; 666 case CHIP_FIJI: 667 amdgpu_program_register_sequence(adev, 668 fiji_mgcg_cgcg_init, 669 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 670 amdgpu_program_register_sequence(adev, 671 golden_settings_fiji_a10, 672 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 673 amdgpu_program_register_sequence(adev, 674 fiji_golden_common_all, 675 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 676 break; 677 678 case CHIP_TONGA: 679 amdgpu_program_register_sequence(adev, 680 tonga_mgcg_cgcg_init, 681 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 682 amdgpu_program_register_sequence(adev, 683 golden_settings_tonga_a11, 684 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 685 amdgpu_program_register_sequence(adev, 686 tonga_golden_common_all, 687 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 688 break; 689 case CHIP_POLARIS11: 690 amdgpu_program_register_sequence(adev, 691 golden_settings_polaris11_a11, 692 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 693 amdgpu_program_register_sequence(adev, 694 polaris11_golden_common_all, 695 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 696 break; 697 case CHIP_POLARIS10: 698 amdgpu_program_register_sequence(adev, 699 golden_settings_polaris10_a11, 700 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 701 amdgpu_program_register_sequence(adev, 702 polaris10_golden_common_all, 703 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 704 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 705 if (adev->pdev->revision == 0xc7 && 706 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 707 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 708 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 709 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 710 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 711 } 712 break; 713 case CHIP_CARRIZO: 714 amdgpu_program_register_sequence(adev, 715 cz_mgcg_cgcg_init, 716 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 717 amdgpu_program_register_sequence(adev, 718 cz_golden_settings_a11, 719 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 720 amdgpu_program_register_sequence(adev, 721 cz_golden_common_all, 722 (const u32)ARRAY_SIZE(cz_golden_common_all)); 723 break; 724 case CHIP_STONEY: 725 amdgpu_program_register_sequence(adev, 726 stoney_mgcg_cgcg_init, 727 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 728 amdgpu_program_register_sequence(adev, 729 stoney_golden_settings_a11, 730 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 731 amdgpu_program_register_sequence(adev, 732 stoney_golden_common_all, 733 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 734 break; 735 default: 736 break; 737 } 738 } 739 740 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 741 { 742 int i; 743 744 adev->gfx.scratch.num_reg = 7; 745 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 746 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 747 adev->gfx.scratch.free[i] = true; 748 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 749 } 750 } 751 752 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 753 { 754 struct amdgpu_device *adev = ring->adev; 755 uint32_t scratch; 756 uint32_t tmp = 0; 757 unsigned i; 758 int r; 759 760 r = amdgpu_gfx_scratch_get(adev, &scratch); 761 if (r) { 762 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 763 return r; 764 } 765 WREG32(scratch, 0xCAFEDEAD); 766 r = amdgpu_ring_alloc(ring, 3); 767 if (r) { 768 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 769 ring->idx, r); 770 amdgpu_gfx_scratch_free(adev, scratch); 771 return r; 772 } 773 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 774 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 775 amdgpu_ring_write(ring, 0xDEADBEEF); 776 amdgpu_ring_commit(ring); 777 778 for (i = 0; i < adev->usec_timeout; i++) { 779 tmp = RREG32(scratch); 780 if (tmp == 0xDEADBEEF) 781 break; 782 DRM_UDELAY(1); 783 } 784 if (i < adev->usec_timeout) { 785 DRM_INFO("ring test on %d succeeded in %d usecs\n", 786 ring->idx, i); 787 } else { 788 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 789 ring->idx, scratch, tmp); 790 r = -EINVAL; 791 } 792 amdgpu_gfx_scratch_free(adev, scratch); 793 return r; 794 } 795 796 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 797 { 798 struct amdgpu_device *adev = ring->adev; 799 struct amdgpu_ib ib; 800 struct fence *f = NULL; 801 uint32_t scratch; 802 uint32_t tmp = 0; 803 long r; 804 805 r = amdgpu_gfx_scratch_get(adev, &scratch); 806 if (r) { 807 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 808 return r; 809 } 810 WREG32(scratch, 0xCAFEDEAD); 811 memset(&ib, 0, sizeof(ib)); 812 r = amdgpu_ib_get(adev, NULL, 256, &ib); 813 if (r) { 814 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 815 goto err1; 816 } 817 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 818 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 819 ib.ptr[2] = 0xDEADBEEF; 820 ib.length_dw = 3; 821 822 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 823 if (r) 824 goto err2; 825 826 r = fence_wait_timeout(f, false, timeout); 827 if (r == 0) { 828 DRM_ERROR("amdgpu: IB test timed out.\n"); 829 r = -ETIMEDOUT; 830 goto err2; 831 } else if (r < 0) { 832 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 833 goto err2; 834 } 835 tmp = RREG32(scratch); 836 if (tmp == 0xDEADBEEF) { 837 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 838 r = 0; 839 } else { 840 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 841 scratch, tmp); 842 r = -EINVAL; 843 } 844 err2: 845 amdgpu_ib_free(adev, &ib, NULL); 846 fence_put(f); 847 err1: 848 amdgpu_gfx_scratch_free(adev, scratch); 849 return r; 850 } 851 852 853 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 854 release_firmware(adev->gfx.pfp_fw); 855 adev->gfx.pfp_fw = NULL; 856 release_firmware(adev->gfx.me_fw); 857 adev->gfx.me_fw = NULL; 858 release_firmware(adev->gfx.ce_fw); 859 adev->gfx.ce_fw = NULL; 860 release_firmware(adev->gfx.rlc_fw); 861 adev->gfx.rlc_fw = NULL; 862 release_firmware(adev->gfx.mec_fw); 863 adev->gfx.mec_fw = NULL; 864 if ((adev->asic_type != CHIP_STONEY) && 865 (adev->asic_type != CHIP_TOPAZ)) 866 release_firmware(adev->gfx.mec2_fw); 867 adev->gfx.mec2_fw = NULL; 868 869 kfree(adev->gfx.rlc.register_list_format); 870 } 871 872 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 873 { 874 const char *chip_name; 875 char fw_name[30]; 876 int err; 877 struct amdgpu_firmware_info *info = NULL; 878 const struct common_firmware_header *header = NULL; 879 const struct gfx_firmware_header_v1_0 *cp_hdr; 880 const struct rlc_firmware_header_v2_0 *rlc_hdr; 881 unsigned int *tmp = NULL, i; 882 883 DRM_DEBUG("\n"); 884 885 switch (adev->asic_type) { 886 case CHIP_TOPAZ: 887 chip_name = "topaz"; 888 break; 889 case CHIP_TONGA: 890 chip_name = "tonga"; 891 break; 892 case CHIP_CARRIZO: 893 chip_name = "carrizo"; 894 break; 895 case CHIP_FIJI: 896 chip_name = "fiji"; 897 break; 898 case CHIP_POLARIS11: 899 chip_name = "polaris11"; 900 break; 901 case CHIP_POLARIS10: 902 chip_name = "polaris10"; 903 break; 904 case CHIP_STONEY: 905 chip_name = "stoney"; 906 break; 907 default: 908 BUG(); 909 } 910 911 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 912 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 913 if (err) 914 goto out; 915 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 916 if (err) 917 goto out; 918 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 919 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 920 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 921 922 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 923 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 924 if (err) 925 goto out; 926 err = amdgpu_ucode_validate(adev->gfx.me_fw); 927 if (err) 928 goto out; 929 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 930 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 931 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 932 933 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 934 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 935 if (err) 936 goto out; 937 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 938 if (err) 939 goto out; 940 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 941 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 942 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 943 944 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 945 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 946 if (err) 947 goto out; 948 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 949 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 950 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 951 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 952 953 adev->gfx.rlc.save_and_restore_offset = 954 le32_to_cpu(rlc_hdr->save_and_restore_offset); 955 adev->gfx.rlc.clear_state_descriptor_offset = 956 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 957 adev->gfx.rlc.avail_scratch_ram_locations = 958 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 959 adev->gfx.rlc.reg_restore_list_size = 960 le32_to_cpu(rlc_hdr->reg_restore_list_size); 961 adev->gfx.rlc.reg_list_format_start = 962 le32_to_cpu(rlc_hdr->reg_list_format_start); 963 adev->gfx.rlc.reg_list_format_separate_start = 964 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 965 adev->gfx.rlc.starting_offsets_start = 966 le32_to_cpu(rlc_hdr->starting_offsets_start); 967 adev->gfx.rlc.reg_list_format_size_bytes = 968 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 969 adev->gfx.rlc.reg_list_size_bytes = 970 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 971 972 adev->gfx.rlc.register_list_format = 973 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 974 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 975 976 if (!adev->gfx.rlc.register_list_format) { 977 err = -ENOMEM; 978 goto out; 979 } 980 981 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 982 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 983 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 984 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 985 986 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 987 988 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 989 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 990 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 991 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 992 993 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 994 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 995 if (err) 996 goto out; 997 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 998 if (err) 999 goto out; 1000 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1001 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1002 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1003 1004 if ((adev->asic_type != CHIP_STONEY) && 1005 (adev->asic_type != CHIP_TOPAZ)) { 1006 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1007 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1008 if (!err) { 1009 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1010 if (err) 1011 goto out; 1012 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1013 adev->gfx.mec2_fw->data; 1014 adev->gfx.mec2_fw_version = 1015 le32_to_cpu(cp_hdr->header.ucode_version); 1016 adev->gfx.mec2_feature_version = 1017 le32_to_cpu(cp_hdr->ucode_feature_version); 1018 } else { 1019 err = 0; 1020 adev->gfx.mec2_fw = NULL; 1021 } 1022 } 1023 1024 if (adev->firmware.smu_load) { 1025 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1026 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1027 info->fw = adev->gfx.pfp_fw; 1028 header = (const struct common_firmware_header *)info->fw->data; 1029 adev->firmware.fw_size += 1030 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1031 1032 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1033 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1034 info->fw = adev->gfx.me_fw; 1035 header = (const struct common_firmware_header *)info->fw->data; 1036 adev->firmware.fw_size += 1037 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1038 1039 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1040 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1041 info->fw = adev->gfx.ce_fw; 1042 header = (const struct common_firmware_header *)info->fw->data; 1043 adev->firmware.fw_size += 1044 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1045 1046 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1047 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1048 info->fw = adev->gfx.rlc_fw; 1049 header = (const struct common_firmware_header *)info->fw->data; 1050 adev->firmware.fw_size += 1051 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1052 1053 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1054 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1055 info->fw = adev->gfx.mec_fw; 1056 header = (const struct common_firmware_header *)info->fw->data; 1057 adev->firmware.fw_size += 1058 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1059 1060 if (adev->gfx.mec2_fw) { 1061 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1062 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1063 info->fw = adev->gfx.mec2_fw; 1064 header = (const struct common_firmware_header *)info->fw->data; 1065 adev->firmware.fw_size += 1066 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1067 } 1068 1069 } 1070 1071 out: 1072 if (err) { 1073 dev_err(adev->dev, 1074 "gfx8: Failed to load firmware \"%s\"\n", 1075 fw_name); 1076 release_firmware(adev->gfx.pfp_fw); 1077 adev->gfx.pfp_fw = NULL; 1078 release_firmware(adev->gfx.me_fw); 1079 adev->gfx.me_fw = NULL; 1080 release_firmware(adev->gfx.ce_fw); 1081 adev->gfx.ce_fw = NULL; 1082 release_firmware(adev->gfx.rlc_fw); 1083 adev->gfx.rlc_fw = NULL; 1084 release_firmware(adev->gfx.mec_fw); 1085 adev->gfx.mec_fw = NULL; 1086 release_firmware(adev->gfx.mec2_fw); 1087 adev->gfx.mec2_fw = NULL; 1088 } 1089 return err; 1090 } 1091 1092 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1093 volatile u32 *buffer) 1094 { 1095 u32 count = 0, i; 1096 const struct cs_section_def *sect = NULL; 1097 const struct cs_extent_def *ext = NULL; 1098 1099 if (adev->gfx.rlc.cs_data == NULL) 1100 return; 1101 if (buffer == NULL) 1102 return; 1103 1104 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1105 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1106 1107 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1108 buffer[count++] = cpu_to_le32(0x80000000); 1109 buffer[count++] = cpu_to_le32(0x80000000); 1110 1111 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1112 for (ext = sect->section; ext->extent != NULL; ++ext) { 1113 if (sect->id == SECT_CONTEXT) { 1114 buffer[count++] = 1115 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1116 buffer[count++] = cpu_to_le32(ext->reg_index - 1117 PACKET3_SET_CONTEXT_REG_START); 1118 for (i = 0; i < ext->reg_count; i++) 1119 buffer[count++] = cpu_to_le32(ext->extent[i]); 1120 } else { 1121 return; 1122 } 1123 } 1124 } 1125 1126 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1127 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1128 PACKET3_SET_CONTEXT_REG_START); 1129 switch (adev->asic_type) { 1130 case CHIP_TONGA: 1131 case CHIP_POLARIS10: 1132 buffer[count++] = cpu_to_le32(0x16000012); 1133 buffer[count++] = cpu_to_le32(0x0000002A); 1134 break; 1135 case CHIP_POLARIS11: 1136 buffer[count++] = cpu_to_le32(0x16000012); 1137 buffer[count++] = cpu_to_le32(0x00000000); 1138 break; 1139 case CHIP_FIJI: 1140 buffer[count++] = cpu_to_le32(0x3a00161a); 1141 buffer[count++] = cpu_to_le32(0x0000002e); 1142 break; 1143 case CHIP_TOPAZ: 1144 case CHIP_CARRIZO: 1145 buffer[count++] = cpu_to_le32(0x00000002); 1146 buffer[count++] = cpu_to_le32(0x00000000); 1147 break; 1148 case CHIP_STONEY: 1149 buffer[count++] = cpu_to_le32(0x00000000); 1150 buffer[count++] = cpu_to_le32(0x00000000); 1151 break; 1152 default: 1153 buffer[count++] = cpu_to_le32(0x00000000); 1154 buffer[count++] = cpu_to_le32(0x00000000); 1155 break; 1156 } 1157 1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1159 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1160 1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1162 buffer[count++] = cpu_to_le32(0); 1163 } 1164 1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1166 { 1167 const __le32 *fw_data; 1168 volatile u32 *dst_ptr; 1169 int me, i, max_me = 4; 1170 u32 bo_offset = 0; 1171 u32 table_offset, table_size; 1172 1173 if (adev->asic_type == CHIP_CARRIZO) 1174 max_me = 5; 1175 1176 /* write the cp table buffer */ 1177 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1178 for (me = 0; me < max_me; me++) { 1179 if (me == 0) { 1180 const struct gfx_firmware_header_v1_0 *hdr = 1181 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1182 fw_data = (const __le32 *) 1183 (adev->gfx.ce_fw->data + 1184 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1185 table_offset = le32_to_cpu(hdr->jt_offset); 1186 table_size = le32_to_cpu(hdr->jt_size); 1187 } else if (me == 1) { 1188 const struct gfx_firmware_header_v1_0 *hdr = 1189 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1190 fw_data = (const __le32 *) 1191 (adev->gfx.pfp_fw->data + 1192 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1193 table_offset = le32_to_cpu(hdr->jt_offset); 1194 table_size = le32_to_cpu(hdr->jt_size); 1195 } else if (me == 2) { 1196 const struct gfx_firmware_header_v1_0 *hdr = 1197 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1198 fw_data = (const __le32 *) 1199 (adev->gfx.me_fw->data + 1200 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1201 table_offset = le32_to_cpu(hdr->jt_offset); 1202 table_size = le32_to_cpu(hdr->jt_size); 1203 } else if (me == 3) { 1204 const struct gfx_firmware_header_v1_0 *hdr = 1205 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1206 fw_data = (const __le32 *) 1207 (adev->gfx.mec_fw->data + 1208 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1209 table_offset = le32_to_cpu(hdr->jt_offset); 1210 table_size = le32_to_cpu(hdr->jt_size); 1211 } else if (me == 4) { 1212 const struct gfx_firmware_header_v1_0 *hdr = 1213 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1214 fw_data = (const __le32 *) 1215 (adev->gfx.mec2_fw->data + 1216 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1217 table_offset = le32_to_cpu(hdr->jt_offset); 1218 table_size = le32_to_cpu(hdr->jt_size); 1219 } 1220 1221 for (i = 0; i < table_size; i ++) { 1222 dst_ptr[bo_offset + i] = 1223 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1224 } 1225 1226 bo_offset += table_size; 1227 } 1228 } 1229 1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1231 { 1232 int r; 1233 1234 /* clear state block */ 1235 if (adev->gfx.rlc.clear_state_obj) { 1236 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1237 if (unlikely(r != 0)) 1238 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1239 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1240 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1241 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1242 adev->gfx.rlc.clear_state_obj = NULL; 1243 } 1244 1245 /* jump table block */ 1246 if (adev->gfx.rlc.cp_table_obj) { 1247 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1248 if (unlikely(r != 0)) 1249 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1250 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1251 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1252 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1253 adev->gfx.rlc.cp_table_obj = NULL; 1254 } 1255 } 1256 1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1258 { 1259 volatile u32 *dst_ptr; 1260 u32 dws; 1261 const struct cs_section_def *cs_data; 1262 int r; 1263 1264 adev->gfx.rlc.cs_data = vi_cs_data; 1265 1266 cs_data = adev->gfx.rlc.cs_data; 1267 1268 if (cs_data) { 1269 /* clear state block */ 1270 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1271 1272 if (adev->gfx.rlc.clear_state_obj == NULL) { 1273 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1274 AMDGPU_GEM_DOMAIN_VRAM, 1275 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1276 NULL, NULL, 1277 &adev->gfx.rlc.clear_state_obj); 1278 if (r) { 1279 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1280 gfx_v8_0_rlc_fini(adev); 1281 return r; 1282 } 1283 } 1284 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1285 if (unlikely(r != 0)) { 1286 gfx_v8_0_rlc_fini(adev); 1287 return r; 1288 } 1289 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1290 &adev->gfx.rlc.clear_state_gpu_addr); 1291 if (r) { 1292 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1293 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1294 gfx_v8_0_rlc_fini(adev); 1295 return r; 1296 } 1297 1298 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1299 if (r) { 1300 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1301 gfx_v8_0_rlc_fini(adev); 1302 return r; 1303 } 1304 /* set up the cs buffer */ 1305 dst_ptr = adev->gfx.rlc.cs_ptr; 1306 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1307 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1308 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1309 } 1310 1311 if ((adev->asic_type == CHIP_CARRIZO) || 1312 (adev->asic_type == CHIP_STONEY)) { 1313 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1314 if (adev->gfx.rlc.cp_table_obj == NULL) { 1315 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1316 AMDGPU_GEM_DOMAIN_VRAM, 1317 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1318 NULL, NULL, 1319 &adev->gfx.rlc.cp_table_obj); 1320 if (r) { 1321 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1322 return r; 1323 } 1324 } 1325 1326 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1327 if (unlikely(r != 0)) { 1328 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1329 return r; 1330 } 1331 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1332 &adev->gfx.rlc.cp_table_gpu_addr); 1333 if (r) { 1334 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1335 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1336 return r; 1337 } 1338 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1339 if (r) { 1340 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1341 return r; 1342 } 1343 1344 cz_init_cp_jump_table(adev); 1345 1346 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1347 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1348 } 1349 1350 return 0; 1351 } 1352 1353 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1354 { 1355 int r; 1356 1357 if (adev->gfx.mec.hpd_eop_obj) { 1358 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1359 if (unlikely(r != 0)) 1360 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1361 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1362 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1363 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1364 adev->gfx.mec.hpd_eop_obj = NULL; 1365 } 1366 } 1367 1368 #define MEC_HPD_SIZE 2048 1369 1370 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1371 { 1372 int r; 1373 u32 *hpd; 1374 1375 /* 1376 * we assign only 1 pipe because all other pipes will 1377 * be handled by KFD 1378 */ 1379 adev->gfx.mec.num_mec = 1; 1380 adev->gfx.mec.num_pipe = 1; 1381 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1382 1383 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1384 r = amdgpu_bo_create(adev, 1385 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 1386 PAGE_SIZE, true, 1387 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1388 &adev->gfx.mec.hpd_eop_obj); 1389 if (r) { 1390 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1391 return r; 1392 } 1393 } 1394 1395 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1396 if (unlikely(r != 0)) { 1397 gfx_v8_0_mec_fini(adev); 1398 return r; 1399 } 1400 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1401 &adev->gfx.mec.hpd_eop_gpu_addr); 1402 if (r) { 1403 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1404 gfx_v8_0_mec_fini(adev); 1405 return r; 1406 } 1407 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1408 if (r) { 1409 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1410 gfx_v8_0_mec_fini(adev); 1411 return r; 1412 } 1413 1414 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 1415 1416 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1417 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1418 1419 return 0; 1420 } 1421 1422 static const u32 vgpr_init_compute_shader[] = 1423 { 1424 0x7e000209, 0x7e020208, 1425 0x7e040207, 0x7e060206, 1426 0x7e080205, 0x7e0a0204, 1427 0x7e0c0203, 0x7e0e0202, 1428 0x7e100201, 0x7e120200, 1429 0x7e140209, 0x7e160208, 1430 0x7e180207, 0x7e1a0206, 1431 0x7e1c0205, 0x7e1e0204, 1432 0x7e200203, 0x7e220202, 1433 0x7e240201, 0x7e260200, 1434 0x7e280209, 0x7e2a0208, 1435 0x7e2c0207, 0x7e2e0206, 1436 0x7e300205, 0x7e320204, 1437 0x7e340203, 0x7e360202, 1438 0x7e380201, 0x7e3a0200, 1439 0x7e3c0209, 0x7e3e0208, 1440 0x7e400207, 0x7e420206, 1441 0x7e440205, 0x7e460204, 1442 0x7e480203, 0x7e4a0202, 1443 0x7e4c0201, 0x7e4e0200, 1444 0x7e500209, 0x7e520208, 1445 0x7e540207, 0x7e560206, 1446 0x7e580205, 0x7e5a0204, 1447 0x7e5c0203, 0x7e5e0202, 1448 0x7e600201, 0x7e620200, 1449 0x7e640209, 0x7e660208, 1450 0x7e680207, 0x7e6a0206, 1451 0x7e6c0205, 0x7e6e0204, 1452 0x7e700203, 0x7e720202, 1453 0x7e740201, 0x7e760200, 1454 0x7e780209, 0x7e7a0208, 1455 0x7e7c0207, 0x7e7e0206, 1456 0xbf8a0000, 0xbf810000, 1457 }; 1458 1459 static const u32 sgpr_init_compute_shader[] = 1460 { 1461 0xbe8a0100, 0xbe8c0102, 1462 0xbe8e0104, 0xbe900106, 1463 0xbe920108, 0xbe940100, 1464 0xbe960102, 0xbe980104, 1465 0xbe9a0106, 0xbe9c0108, 1466 0xbe9e0100, 0xbea00102, 1467 0xbea20104, 0xbea40106, 1468 0xbea60108, 0xbea80100, 1469 0xbeaa0102, 0xbeac0104, 1470 0xbeae0106, 0xbeb00108, 1471 0xbeb20100, 0xbeb40102, 1472 0xbeb60104, 0xbeb80106, 1473 0xbeba0108, 0xbebc0100, 1474 0xbebe0102, 0xbec00104, 1475 0xbec20106, 0xbec40108, 1476 0xbec60100, 0xbec80102, 1477 0xbee60004, 0xbee70005, 1478 0xbeea0006, 0xbeeb0007, 1479 0xbee80008, 0xbee90009, 1480 0xbefc0000, 0xbf8a0000, 1481 0xbf810000, 0x00000000, 1482 }; 1483 1484 static const u32 vgpr_init_regs[] = 1485 { 1486 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1487 mmCOMPUTE_RESOURCE_LIMITS, 0, 1488 mmCOMPUTE_NUM_THREAD_X, 256*4, 1489 mmCOMPUTE_NUM_THREAD_Y, 1, 1490 mmCOMPUTE_NUM_THREAD_Z, 1, 1491 mmCOMPUTE_PGM_RSRC2, 20, 1492 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1493 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1494 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1495 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1496 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1497 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1498 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1499 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1500 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1501 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1502 }; 1503 1504 static const u32 sgpr1_init_regs[] = 1505 { 1506 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1507 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1508 mmCOMPUTE_NUM_THREAD_X, 256*5, 1509 mmCOMPUTE_NUM_THREAD_Y, 1, 1510 mmCOMPUTE_NUM_THREAD_Z, 1, 1511 mmCOMPUTE_PGM_RSRC2, 20, 1512 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1513 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1514 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1515 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1516 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1517 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1518 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1519 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1520 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1521 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1522 }; 1523 1524 static const u32 sgpr2_init_regs[] = 1525 { 1526 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1527 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1528 mmCOMPUTE_NUM_THREAD_X, 256*5, 1529 mmCOMPUTE_NUM_THREAD_Y, 1, 1530 mmCOMPUTE_NUM_THREAD_Z, 1, 1531 mmCOMPUTE_PGM_RSRC2, 20, 1532 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1533 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1534 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1535 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1536 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1537 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1538 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1539 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1540 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1541 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1542 }; 1543 1544 static const u32 sec_ded_counter_registers[] = 1545 { 1546 mmCPC_EDC_ATC_CNT, 1547 mmCPC_EDC_SCRATCH_CNT, 1548 mmCPC_EDC_UCODE_CNT, 1549 mmCPF_EDC_ATC_CNT, 1550 mmCPF_EDC_ROQ_CNT, 1551 mmCPF_EDC_TAG_CNT, 1552 mmCPG_EDC_ATC_CNT, 1553 mmCPG_EDC_DMA_CNT, 1554 mmCPG_EDC_TAG_CNT, 1555 mmDC_EDC_CSINVOC_CNT, 1556 mmDC_EDC_RESTORE_CNT, 1557 mmDC_EDC_STATE_CNT, 1558 mmGDS_EDC_CNT, 1559 mmGDS_EDC_GRBM_CNT, 1560 mmGDS_EDC_OA_DED, 1561 mmSPI_EDC_CNT, 1562 mmSQC_ATC_EDC_GATCL1_CNT, 1563 mmSQC_EDC_CNT, 1564 mmSQ_EDC_DED_CNT, 1565 mmSQ_EDC_INFO, 1566 mmSQ_EDC_SEC_CNT, 1567 mmTCC_EDC_CNT, 1568 mmTCP_ATC_EDC_GATCL1_CNT, 1569 mmTCP_EDC_CNT, 1570 mmTD_EDC_CNT 1571 }; 1572 1573 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1574 { 1575 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1576 struct amdgpu_ib ib; 1577 struct fence *f = NULL; 1578 int r, i; 1579 u32 tmp; 1580 unsigned total_size, vgpr_offset, sgpr_offset; 1581 u64 gpu_addr; 1582 1583 /* only supported on CZ */ 1584 if (adev->asic_type != CHIP_CARRIZO) 1585 return 0; 1586 1587 /* bail if the compute ring is not ready */ 1588 if (!ring->ready) 1589 return 0; 1590 1591 tmp = RREG32(mmGB_EDC_MODE); 1592 WREG32(mmGB_EDC_MODE, 0); 1593 1594 total_size = 1595 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1596 total_size += 1597 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1598 total_size += 1599 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1600 total_size = ALIGN(total_size, 256); 1601 vgpr_offset = total_size; 1602 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1603 sgpr_offset = total_size; 1604 total_size += sizeof(sgpr_init_compute_shader); 1605 1606 /* allocate an indirect buffer to put the commands in */ 1607 memset(&ib, 0, sizeof(ib)); 1608 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1609 if (r) { 1610 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1611 return r; 1612 } 1613 1614 /* load the compute shaders */ 1615 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1616 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1617 1618 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1619 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1620 1621 /* init the ib length to 0 */ 1622 ib.length_dw = 0; 1623 1624 /* VGPR */ 1625 /* write the register state for the compute dispatch */ 1626 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1627 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1628 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1629 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1630 } 1631 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1632 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1633 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1634 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1635 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1636 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1637 1638 /* write dispatch packet */ 1639 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1640 ib.ptr[ib.length_dw++] = 8; /* x */ 1641 ib.ptr[ib.length_dw++] = 1; /* y */ 1642 ib.ptr[ib.length_dw++] = 1; /* z */ 1643 ib.ptr[ib.length_dw++] = 1644 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1645 1646 /* write CS partial flush packet */ 1647 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1648 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1649 1650 /* SGPR1 */ 1651 /* write the register state for the compute dispatch */ 1652 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1653 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1654 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1655 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1656 } 1657 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1658 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1659 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1660 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1661 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1662 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1663 1664 /* write dispatch packet */ 1665 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1666 ib.ptr[ib.length_dw++] = 8; /* x */ 1667 ib.ptr[ib.length_dw++] = 1; /* y */ 1668 ib.ptr[ib.length_dw++] = 1; /* z */ 1669 ib.ptr[ib.length_dw++] = 1670 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1671 1672 /* write CS partial flush packet */ 1673 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1674 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1675 1676 /* SGPR2 */ 1677 /* write the register state for the compute dispatch */ 1678 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1679 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1680 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1681 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1682 } 1683 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1684 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1685 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1686 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1687 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1688 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1689 1690 /* write dispatch packet */ 1691 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1692 ib.ptr[ib.length_dw++] = 8; /* x */ 1693 ib.ptr[ib.length_dw++] = 1; /* y */ 1694 ib.ptr[ib.length_dw++] = 1; /* z */ 1695 ib.ptr[ib.length_dw++] = 1696 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1697 1698 /* write CS partial flush packet */ 1699 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1700 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1701 1702 /* shedule the ib on the ring */ 1703 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1704 if (r) { 1705 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1706 goto fail; 1707 } 1708 1709 /* wait for the GPU to finish processing the IB */ 1710 r = fence_wait(f, false); 1711 if (r) { 1712 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1713 goto fail; 1714 } 1715 1716 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1717 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1718 WREG32(mmGB_EDC_MODE, tmp); 1719 1720 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1721 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1722 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1723 1724 1725 /* read back registers to clear the counters */ 1726 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1727 RREG32(sec_ded_counter_registers[i]); 1728 1729 fail: 1730 amdgpu_ib_free(adev, &ib, NULL); 1731 fence_put(f); 1732 1733 return r; 1734 } 1735 1736 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1737 { 1738 u32 gb_addr_config; 1739 u32 mc_shared_chmap, mc_arb_ramcfg; 1740 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1741 u32 tmp; 1742 int ret; 1743 1744 switch (adev->asic_type) { 1745 case CHIP_TOPAZ: 1746 adev->gfx.config.max_shader_engines = 1; 1747 adev->gfx.config.max_tile_pipes = 2; 1748 adev->gfx.config.max_cu_per_sh = 6; 1749 adev->gfx.config.max_sh_per_se = 1; 1750 adev->gfx.config.max_backends_per_se = 2; 1751 adev->gfx.config.max_texture_channel_caches = 2; 1752 adev->gfx.config.max_gprs = 256; 1753 adev->gfx.config.max_gs_threads = 32; 1754 adev->gfx.config.max_hw_contexts = 8; 1755 1756 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1757 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1758 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1759 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1760 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1761 break; 1762 case CHIP_FIJI: 1763 adev->gfx.config.max_shader_engines = 4; 1764 adev->gfx.config.max_tile_pipes = 16; 1765 adev->gfx.config.max_cu_per_sh = 16; 1766 adev->gfx.config.max_sh_per_se = 1; 1767 adev->gfx.config.max_backends_per_se = 4; 1768 adev->gfx.config.max_texture_channel_caches = 16; 1769 adev->gfx.config.max_gprs = 256; 1770 adev->gfx.config.max_gs_threads = 32; 1771 adev->gfx.config.max_hw_contexts = 8; 1772 1773 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1774 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1775 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1776 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1777 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1778 break; 1779 case CHIP_POLARIS11: 1780 ret = amdgpu_atombios_get_gfx_info(adev); 1781 if (ret) 1782 return ret; 1783 adev->gfx.config.max_gprs = 256; 1784 adev->gfx.config.max_gs_threads = 32; 1785 adev->gfx.config.max_hw_contexts = 8; 1786 1787 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1788 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1789 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1790 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1791 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1792 break; 1793 case CHIP_POLARIS10: 1794 ret = amdgpu_atombios_get_gfx_info(adev); 1795 if (ret) 1796 return ret; 1797 adev->gfx.config.max_gprs = 256; 1798 adev->gfx.config.max_gs_threads = 32; 1799 adev->gfx.config.max_hw_contexts = 8; 1800 1801 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1802 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1803 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1804 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1805 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1806 break; 1807 case CHIP_TONGA: 1808 adev->gfx.config.max_shader_engines = 4; 1809 adev->gfx.config.max_tile_pipes = 8; 1810 adev->gfx.config.max_cu_per_sh = 8; 1811 adev->gfx.config.max_sh_per_se = 1; 1812 adev->gfx.config.max_backends_per_se = 2; 1813 adev->gfx.config.max_texture_channel_caches = 8; 1814 adev->gfx.config.max_gprs = 256; 1815 adev->gfx.config.max_gs_threads = 32; 1816 adev->gfx.config.max_hw_contexts = 8; 1817 1818 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1819 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1820 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1821 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1822 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1823 break; 1824 case CHIP_CARRIZO: 1825 adev->gfx.config.max_shader_engines = 1; 1826 adev->gfx.config.max_tile_pipes = 2; 1827 adev->gfx.config.max_sh_per_se = 1; 1828 adev->gfx.config.max_backends_per_se = 2; 1829 1830 switch (adev->pdev->revision) { 1831 case 0xc4: 1832 case 0x84: 1833 case 0xc8: 1834 case 0xcc: 1835 case 0xe1: 1836 case 0xe3: 1837 /* B10 */ 1838 adev->gfx.config.max_cu_per_sh = 8; 1839 break; 1840 case 0xc5: 1841 case 0x81: 1842 case 0x85: 1843 case 0xc9: 1844 case 0xcd: 1845 case 0xe2: 1846 case 0xe4: 1847 /* B8 */ 1848 adev->gfx.config.max_cu_per_sh = 6; 1849 break; 1850 case 0xc6: 1851 case 0xca: 1852 case 0xce: 1853 case 0x88: 1854 /* B6 */ 1855 adev->gfx.config.max_cu_per_sh = 6; 1856 break; 1857 case 0xc7: 1858 case 0x87: 1859 case 0xcb: 1860 case 0xe5: 1861 case 0x89: 1862 default: 1863 /* B4 */ 1864 adev->gfx.config.max_cu_per_sh = 4; 1865 break; 1866 } 1867 1868 adev->gfx.config.max_texture_channel_caches = 2; 1869 adev->gfx.config.max_gprs = 256; 1870 adev->gfx.config.max_gs_threads = 32; 1871 adev->gfx.config.max_hw_contexts = 8; 1872 1873 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1874 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1875 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1876 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1877 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1878 break; 1879 case CHIP_STONEY: 1880 adev->gfx.config.max_shader_engines = 1; 1881 adev->gfx.config.max_tile_pipes = 2; 1882 adev->gfx.config.max_sh_per_se = 1; 1883 adev->gfx.config.max_backends_per_se = 1; 1884 1885 switch (adev->pdev->revision) { 1886 case 0xc0: 1887 case 0xc1: 1888 case 0xc2: 1889 case 0xc4: 1890 case 0xc8: 1891 case 0xc9: 1892 adev->gfx.config.max_cu_per_sh = 3; 1893 break; 1894 case 0xd0: 1895 case 0xd1: 1896 case 0xd2: 1897 default: 1898 adev->gfx.config.max_cu_per_sh = 2; 1899 break; 1900 } 1901 1902 adev->gfx.config.max_texture_channel_caches = 2; 1903 adev->gfx.config.max_gprs = 256; 1904 adev->gfx.config.max_gs_threads = 16; 1905 adev->gfx.config.max_hw_contexts = 8; 1906 1907 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1908 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1909 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1910 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1911 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1912 break; 1913 default: 1914 adev->gfx.config.max_shader_engines = 2; 1915 adev->gfx.config.max_tile_pipes = 4; 1916 adev->gfx.config.max_cu_per_sh = 2; 1917 adev->gfx.config.max_sh_per_se = 1; 1918 adev->gfx.config.max_backends_per_se = 2; 1919 adev->gfx.config.max_texture_channel_caches = 4; 1920 adev->gfx.config.max_gprs = 256; 1921 adev->gfx.config.max_gs_threads = 32; 1922 adev->gfx.config.max_hw_contexts = 8; 1923 1924 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1925 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1926 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1927 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1928 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1929 break; 1930 } 1931 1932 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1933 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1934 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1935 1936 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1937 adev->gfx.config.mem_max_burst_length_bytes = 256; 1938 if (adev->flags & AMD_IS_APU) { 1939 /* Get memory bank mapping mode. */ 1940 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1941 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1942 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1943 1944 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1945 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1946 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1947 1948 /* Validate settings in case only one DIMM installed. */ 1949 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1950 dimm00_addr_map = 0; 1951 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1952 dimm01_addr_map = 0; 1953 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1954 dimm10_addr_map = 0; 1955 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1956 dimm11_addr_map = 0; 1957 1958 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1959 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1960 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1961 adev->gfx.config.mem_row_size_in_kb = 2; 1962 else 1963 adev->gfx.config.mem_row_size_in_kb = 1; 1964 } else { 1965 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1966 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1967 if (adev->gfx.config.mem_row_size_in_kb > 4) 1968 adev->gfx.config.mem_row_size_in_kb = 4; 1969 } 1970 1971 adev->gfx.config.shader_engine_tile_size = 32; 1972 adev->gfx.config.num_gpus = 1; 1973 adev->gfx.config.multi_gpu_tile_size = 64; 1974 1975 /* fix up row size */ 1976 switch (adev->gfx.config.mem_row_size_in_kb) { 1977 case 1: 1978 default: 1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1980 break; 1981 case 2: 1982 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1983 break; 1984 case 4: 1985 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1986 break; 1987 } 1988 adev->gfx.config.gb_addr_config = gb_addr_config; 1989 1990 return 0; 1991 } 1992 1993 static int gfx_v8_0_sw_init(void *handle) 1994 { 1995 int i, r; 1996 struct amdgpu_ring *ring; 1997 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1998 1999 /* EOP Event */ 2000 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 2001 if (r) 2002 return r; 2003 2004 /* Privileged reg */ 2005 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 2006 if (r) 2007 return r; 2008 2009 /* Privileged inst */ 2010 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 2011 if (r) 2012 return r; 2013 2014 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2015 2016 gfx_v8_0_scratch_init(adev); 2017 2018 r = gfx_v8_0_init_microcode(adev); 2019 if (r) { 2020 DRM_ERROR("Failed to load gfx firmware!\n"); 2021 return r; 2022 } 2023 2024 r = gfx_v8_0_rlc_init(adev); 2025 if (r) { 2026 DRM_ERROR("Failed to init rlc BOs!\n"); 2027 return r; 2028 } 2029 2030 r = gfx_v8_0_mec_init(adev); 2031 if (r) { 2032 DRM_ERROR("Failed to init MEC BOs!\n"); 2033 return r; 2034 } 2035 2036 /* set up the gfx ring */ 2037 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2038 ring = &adev->gfx.gfx_ring[i]; 2039 ring->ring_obj = NULL; 2040 sprintf(ring->name, "gfx"); 2041 /* no gfx doorbells on iceland */ 2042 if (adev->asic_type != CHIP_TOPAZ) { 2043 ring->use_doorbell = true; 2044 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2045 } 2046 2047 r = amdgpu_ring_init(adev, ring, 1024, 2048 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 2049 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, 2050 AMDGPU_RING_TYPE_GFX); 2051 if (r) 2052 return r; 2053 } 2054 2055 /* set up the compute queues */ 2056 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2057 unsigned irq_type; 2058 2059 /* max 32 queues per MEC */ 2060 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2061 DRM_ERROR("Too many (%d) compute rings!\n", i); 2062 break; 2063 } 2064 ring = &adev->gfx.compute_ring[i]; 2065 ring->ring_obj = NULL; 2066 ring->use_doorbell = true; 2067 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2068 ring->me = 1; /* first MEC */ 2069 ring->pipe = i / 8; 2070 ring->queue = i % 8; 2071 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2072 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2073 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2074 r = amdgpu_ring_init(adev, ring, 1024, 2075 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 2076 &adev->gfx.eop_irq, irq_type, 2077 AMDGPU_RING_TYPE_COMPUTE); 2078 if (r) 2079 return r; 2080 } 2081 2082 /* reserve GDS, GWS and OA resource for gfx */ 2083 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2084 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2085 &adev->gds.gds_gfx_bo, NULL, NULL); 2086 if (r) 2087 return r; 2088 2089 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2090 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2091 &adev->gds.gws_gfx_bo, NULL, NULL); 2092 if (r) 2093 return r; 2094 2095 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2096 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2097 &adev->gds.oa_gfx_bo, NULL, NULL); 2098 if (r) 2099 return r; 2100 2101 adev->gfx.ce_ram_size = 0x8000; 2102 2103 r = gfx_v8_0_gpu_early_init(adev); 2104 if (r) 2105 return r; 2106 2107 return 0; 2108 } 2109 2110 static int gfx_v8_0_sw_fini(void *handle) 2111 { 2112 int i; 2113 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2114 2115 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2116 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2117 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2118 2119 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2120 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2121 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2122 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2123 2124 gfx_v8_0_mec_fini(adev); 2125 gfx_v8_0_rlc_fini(adev); 2126 gfx_v8_0_free_microcode(adev); 2127 2128 return 0; 2129 } 2130 2131 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2132 { 2133 uint32_t *modearray, *mod2array; 2134 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2135 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2136 u32 reg_offset; 2137 2138 modearray = adev->gfx.config.tile_mode_array; 2139 mod2array = adev->gfx.config.macrotile_mode_array; 2140 2141 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2142 modearray[reg_offset] = 0; 2143 2144 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2145 mod2array[reg_offset] = 0; 2146 2147 switch (adev->asic_type) { 2148 case CHIP_TOPAZ: 2149 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2150 PIPE_CONFIG(ADDR_SURF_P2) | 2151 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2153 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2154 PIPE_CONFIG(ADDR_SURF_P2) | 2155 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2156 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2157 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2158 PIPE_CONFIG(ADDR_SURF_P2) | 2159 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2160 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2161 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2162 PIPE_CONFIG(ADDR_SURF_P2) | 2163 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2165 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2166 PIPE_CONFIG(ADDR_SURF_P2) | 2167 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2168 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2169 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2170 PIPE_CONFIG(ADDR_SURF_P2) | 2171 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2172 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2173 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2174 PIPE_CONFIG(ADDR_SURF_P2) | 2175 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2177 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2178 PIPE_CONFIG(ADDR_SURF_P2)); 2179 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2180 PIPE_CONFIG(ADDR_SURF_P2) | 2181 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2183 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2184 PIPE_CONFIG(ADDR_SURF_P2) | 2185 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2187 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2188 PIPE_CONFIG(ADDR_SURF_P2) | 2189 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2191 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2192 PIPE_CONFIG(ADDR_SURF_P2) | 2193 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2195 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2196 PIPE_CONFIG(ADDR_SURF_P2) | 2197 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2199 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2200 PIPE_CONFIG(ADDR_SURF_P2) | 2201 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2203 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2204 PIPE_CONFIG(ADDR_SURF_P2) | 2205 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2207 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2208 PIPE_CONFIG(ADDR_SURF_P2) | 2209 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2211 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2212 PIPE_CONFIG(ADDR_SURF_P2) | 2213 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2215 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2216 PIPE_CONFIG(ADDR_SURF_P2) | 2217 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2218 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2219 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2220 PIPE_CONFIG(ADDR_SURF_P2) | 2221 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2222 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2223 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2224 PIPE_CONFIG(ADDR_SURF_P2) | 2225 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2226 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2227 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2228 PIPE_CONFIG(ADDR_SURF_P2) | 2229 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2231 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2232 PIPE_CONFIG(ADDR_SURF_P2) | 2233 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2234 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2235 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2236 PIPE_CONFIG(ADDR_SURF_P2) | 2237 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2238 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2239 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2240 PIPE_CONFIG(ADDR_SURF_P2) | 2241 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2243 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2244 PIPE_CONFIG(ADDR_SURF_P2) | 2245 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2247 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2248 PIPE_CONFIG(ADDR_SURF_P2) | 2249 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2251 2252 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2255 NUM_BANKS(ADDR_SURF_8_BANK)); 2256 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2259 NUM_BANKS(ADDR_SURF_8_BANK)); 2260 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2263 NUM_BANKS(ADDR_SURF_8_BANK)); 2264 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2265 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2266 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2267 NUM_BANKS(ADDR_SURF_8_BANK)); 2268 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2269 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2270 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2271 NUM_BANKS(ADDR_SURF_8_BANK)); 2272 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2275 NUM_BANKS(ADDR_SURF_8_BANK)); 2276 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2277 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2278 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2279 NUM_BANKS(ADDR_SURF_8_BANK)); 2280 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2281 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2282 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2283 NUM_BANKS(ADDR_SURF_16_BANK)); 2284 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2285 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2286 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2287 NUM_BANKS(ADDR_SURF_16_BANK)); 2288 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2289 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2290 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2291 NUM_BANKS(ADDR_SURF_16_BANK)); 2292 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2293 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2294 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2295 NUM_BANKS(ADDR_SURF_16_BANK)); 2296 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2297 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2298 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2299 NUM_BANKS(ADDR_SURF_16_BANK)); 2300 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2301 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2302 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2303 NUM_BANKS(ADDR_SURF_16_BANK)); 2304 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2305 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2306 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2307 NUM_BANKS(ADDR_SURF_8_BANK)); 2308 2309 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2310 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2311 reg_offset != 23) 2312 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2313 2314 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2315 if (reg_offset != 7) 2316 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2317 2318 break; 2319 case CHIP_FIJI: 2320 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2321 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2322 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2324 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2325 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2326 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2327 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2328 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2329 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2330 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2331 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2332 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2333 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2334 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2336 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2337 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2338 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2339 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2340 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2341 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2342 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2344 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2345 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2346 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2348 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2349 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2350 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2352 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2354 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2355 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2356 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2358 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2359 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2360 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2362 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2363 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2364 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2366 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2367 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2368 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2370 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2371 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2372 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2374 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2375 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2376 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2378 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2379 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2380 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2381 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2382 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2383 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2384 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2386 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2387 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2389 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2390 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2391 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2393 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2394 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2395 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2397 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2398 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2399 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2401 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2402 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2403 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2405 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2406 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2407 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2409 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2410 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2411 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2413 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2414 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2415 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2417 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2418 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2419 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2421 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2422 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2423 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2425 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2426 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2427 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2428 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2429 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2430 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2434 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2438 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2440 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2442 2443 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2444 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2445 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2446 NUM_BANKS(ADDR_SURF_8_BANK)); 2447 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2448 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2449 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2450 NUM_BANKS(ADDR_SURF_8_BANK)); 2451 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2454 NUM_BANKS(ADDR_SURF_8_BANK)); 2455 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2456 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2457 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2458 NUM_BANKS(ADDR_SURF_8_BANK)); 2459 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2460 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2461 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2462 NUM_BANKS(ADDR_SURF_8_BANK)); 2463 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2466 NUM_BANKS(ADDR_SURF_8_BANK)); 2467 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2468 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2469 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2470 NUM_BANKS(ADDR_SURF_8_BANK)); 2471 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2472 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2473 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2474 NUM_BANKS(ADDR_SURF_8_BANK)); 2475 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2478 NUM_BANKS(ADDR_SURF_8_BANK)); 2479 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2480 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2481 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2482 NUM_BANKS(ADDR_SURF_8_BANK)); 2483 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2484 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2485 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2486 NUM_BANKS(ADDR_SURF_8_BANK)); 2487 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2488 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2489 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2490 NUM_BANKS(ADDR_SURF_8_BANK)); 2491 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2492 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2493 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2494 NUM_BANKS(ADDR_SURF_8_BANK)); 2495 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2496 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2497 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2498 NUM_BANKS(ADDR_SURF_4_BANK)); 2499 2500 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2501 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2502 2503 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2504 if (reg_offset != 7) 2505 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2506 2507 break; 2508 case CHIP_TONGA: 2509 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2510 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2512 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2513 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2514 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2515 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2516 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2517 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2520 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2521 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2522 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2523 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2524 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2525 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2526 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2527 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2528 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2529 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2532 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2533 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2534 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2535 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2537 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2538 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2539 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2540 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2541 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2543 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2544 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2545 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2546 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2547 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2548 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2549 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2550 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2551 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2553 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2554 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2555 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2556 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2557 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2559 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2560 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2561 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2562 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2563 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2564 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2565 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2566 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2567 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2568 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2569 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2570 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2571 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2572 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2573 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2575 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2576 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2577 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2578 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2579 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2580 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2581 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2582 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2583 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2584 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2587 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2590 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2591 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2592 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2594 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2595 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2596 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2598 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2599 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2600 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2603 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2604 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2606 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2607 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2608 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2610 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2611 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2612 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2614 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2615 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2616 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2617 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2618 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2619 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2622 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2623 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2626 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2627 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2628 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2629 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2630 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2631 2632 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2635 NUM_BANKS(ADDR_SURF_16_BANK)); 2636 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2637 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2638 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2639 NUM_BANKS(ADDR_SURF_16_BANK)); 2640 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2643 NUM_BANKS(ADDR_SURF_16_BANK)); 2644 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2647 NUM_BANKS(ADDR_SURF_16_BANK)); 2648 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2649 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2650 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2651 NUM_BANKS(ADDR_SURF_16_BANK)); 2652 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2655 NUM_BANKS(ADDR_SURF_16_BANK)); 2656 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2659 NUM_BANKS(ADDR_SURF_16_BANK)); 2660 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2661 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2662 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2663 NUM_BANKS(ADDR_SURF_16_BANK)); 2664 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2667 NUM_BANKS(ADDR_SURF_16_BANK)); 2668 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2671 NUM_BANKS(ADDR_SURF_16_BANK)); 2672 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2673 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2674 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2675 NUM_BANKS(ADDR_SURF_16_BANK)); 2676 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2679 NUM_BANKS(ADDR_SURF_8_BANK)); 2680 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2683 NUM_BANKS(ADDR_SURF_4_BANK)); 2684 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2685 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2686 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2687 NUM_BANKS(ADDR_SURF_4_BANK)); 2688 2689 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2690 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2691 2692 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2693 if (reg_offset != 7) 2694 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2695 2696 break; 2697 case CHIP_POLARIS11: 2698 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2700 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2701 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2702 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2704 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2706 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2708 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2710 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2712 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2713 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2714 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2718 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2722 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2724 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2726 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2730 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2731 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2732 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2733 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2734 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2735 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2736 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2737 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2738 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2739 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2740 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2741 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2742 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2743 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2744 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2745 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2746 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2747 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2748 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2749 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2750 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2751 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2752 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2753 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2754 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2755 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2756 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2757 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2758 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2759 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2760 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2761 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2762 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2763 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2764 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2765 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2766 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2767 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2768 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2769 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2770 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2771 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2772 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2773 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2774 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2775 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2776 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2777 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2778 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2779 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2780 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2781 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2782 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2783 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2784 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2785 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2786 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2787 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2788 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2789 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2790 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2792 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2793 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2794 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2795 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2796 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2797 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2799 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2800 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2801 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2803 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2804 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2805 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2808 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2809 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2810 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2811 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2812 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2814 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2816 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2818 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2820 2821 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2822 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2823 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2824 NUM_BANKS(ADDR_SURF_16_BANK)); 2825 2826 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2829 NUM_BANKS(ADDR_SURF_16_BANK)); 2830 2831 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2832 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2833 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2834 NUM_BANKS(ADDR_SURF_16_BANK)); 2835 2836 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2837 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2838 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2839 NUM_BANKS(ADDR_SURF_16_BANK)); 2840 2841 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2842 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2843 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2844 NUM_BANKS(ADDR_SURF_16_BANK)); 2845 2846 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2847 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2848 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2849 NUM_BANKS(ADDR_SURF_16_BANK)); 2850 2851 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2852 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2853 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2854 NUM_BANKS(ADDR_SURF_16_BANK)); 2855 2856 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2857 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2858 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2859 NUM_BANKS(ADDR_SURF_16_BANK)); 2860 2861 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2862 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2863 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2864 NUM_BANKS(ADDR_SURF_16_BANK)); 2865 2866 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2867 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2868 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2869 NUM_BANKS(ADDR_SURF_16_BANK)); 2870 2871 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2872 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2873 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2874 NUM_BANKS(ADDR_SURF_16_BANK)); 2875 2876 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2877 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2878 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2879 NUM_BANKS(ADDR_SURF_16_BANK)); 2880 2881 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2882 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2883 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2884 NUM_BANKS(ADDR_SURF_8_BANK)); 2885 2886 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2887 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2888 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2889 NUM_BANKS(ADDR_SURF_4_BANK)); 2890 2891 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2892 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2893 2894 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2895 if (reg_offset != 7) 2896 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2897 2898 break; 2899 case CHIP_POLARIS10: 2900 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2901 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2902 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2903 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2904 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2906 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2908 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2910 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2911 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2912 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2914 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2915 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2916 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2918 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2919 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2920 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2921 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2922 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2923 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2924 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2926 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2927 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2928 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2929 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2930 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2931 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2932 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2934 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2935 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2936 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2937 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2938 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2939 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2940 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2941 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2942 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2943 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2944 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2945 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2946 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2947 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2948 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2949 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2950 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2951 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2952 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2953 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2954 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2955 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2956 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2957 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2958 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2959 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2960 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2961 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2962 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2963 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2964 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2965 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2966 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2967 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2968 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2969 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2970 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2971 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2972 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2973 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2974 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2975 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2976 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2977 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2978 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2980 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2981 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2982 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2983 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2984 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2985 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2986 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2987 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2988 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2990 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2991 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2992 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2994 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2995 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2996 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2997 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2998 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2999 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3000 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3002 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3003 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3004 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3006 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3007 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3008 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3009 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3010 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3011 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3012 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3013 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3014 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3015 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3016 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3018 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3019 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3020 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3022 3023 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3024 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3025 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3026 NUM_BANKS(ADDR_SURF_16_BANK)); 3027 3028 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3029 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3030 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3031 NUM_BANKS(ADDR_SURF_16_BANK)); 3032 3033 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3034 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3035 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3036 NUM_BANKS(ADDR_SURF_16_BANK)); 3037 3038 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3041 NUM_BANKS(ADDR_SURF_16_BANK)); 3042 3043 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3044 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3045 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3046 NUM_BANKS(ADDR_SURF_16_BANK)); 3047 3048 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3049 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3050 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3051 NUM_BANKS(ADDR_SURF_16_BANK)); 3052 3053 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3054 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3055 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3056 NUM_BANKS(ADDR_SURF_16_BANK)); 3057 3058 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3059 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3060 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3061 NUM_BANKS(ADDR_SURF_16_BANK)); 3062 3063 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3066 NUM_BANKS(ADDR_SURF_16_BANK)); 3067 3068 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3069 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3070 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3071 NUM_BANKS(ADDR_SURF_16_BANK)); 3072 3073 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3074 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3075 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3076 NUM_BANKS(ADDR_SURF_16_BANK)); 3077 3078 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3079 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3080 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3081 NUM_BANKS(ADDR_SURF_8_BANK)); 3082 3083 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3084 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3085 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3086 NUM_BANKS(ADDR_SURF_4_BANK)); 3087 3088 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3089 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3090 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3091 NUM_BANKS(ADDR_SURF_4_BANK)); 3092 3093 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3094 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3095 3096 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3097 if (reg_offset != 7) 3098 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3099 3100 break; 3101 case CHIP_STONEY: 3102 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3103 PIPE_CONFIG(ADDR_SURF_P2) | 3104 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3105 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3106 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3107 PIPE_CONFIG(ADDR_SURF_P2) | 3108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3109 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3110 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3111 PIPE_CONFIG(ADDR_SURF_P2) | 3112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3113 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3114 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3115 PIPE_CONFIG(ADDR_SURF_P2) | 3116 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3117 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3118 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3119 PIPE_CONFIG(ADDR_SURF_P2) | 3120 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3121 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3122 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3123 PIPE_CONFIG(ADDR_SURF_P2) | 3124 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3125 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3126 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3127 PIPE_CONFIG(ADDR_SURF_P2) | 3128 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3129 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3130 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3131 PIPE_CONFIG(ADDR_SURF_P2)); 3132 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3133 PIPE_CONFIG(ADDR_SURF_P2) | 3134 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3135 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3136 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3137 PIPE_CONFIG(ADDR_SURF_P2) | 3138 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3140 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3141 PIPE_CONFIG(ADDR_SURF_P2) | 3142 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3143 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3144 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3145 PIPE_CONFIG(ADDR_SURF_P2) | 3146 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3147 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3148 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3149 PIPE_CONFIG(ADDR_SURF_P2) | 3150 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3151 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3152 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3153 PIPE_CONFIG(ADDR_SURF_P2) | 3154 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3156 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3157 PIPE_CONFIG(ADDR_SURF_P2) | 3158 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3159 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3160 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3161 PIPE_CONFIG(ADDR_SURF_P2) | 3162 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3163 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3164 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3165 PIPE_CONFIG(ADDR_SURF_P2) | 3166 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3167 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3168 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3169 PIPE_CONFIG(ADDR_SURF_P2) | 3170 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3172 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3173 PIPE_CONFIG(ADDR_SURF_P2) | 3174 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3175 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3176 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3177 PIPE_CONFIG(ADDR_SURF_P2) | 3178 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3179 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3180 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3181 PIPE_CONFIG(ADDR_SURF_P2) | 3182 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3184 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3185 PIPE_CONFIG(ADDR_SURF_P2) | 3186 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3188 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3189 PIPE_CONFIG(ADDR_SURF_P2) | 3190 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3192 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3196 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3200 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3204 3205 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3206 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3207 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3208 NUM_BANKS(ADDR_SURF_8_BANK)); 3209 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3210 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3211 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3212 NUM_BANKS(ADDR_SURF_8_BANK)); 3213 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3214 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3215 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3216 NUM_BANKS(ADDR_SURF_8_BANK)); 3217 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3218 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3219 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3220 NUM_BANKS(ADDR_SURF_8_BANK)); 3221 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3222 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3223 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3224 NUM_BANKS(ADDR_SURF_8_BANK)); 3225 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3226 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3227 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3228 NUM_BANKS(ADDR_SURF_8_BANK)); 3229 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3232 NUM_BANKS(ADDR_SURF_8_BANK)); 3233 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3234 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3235 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3236 NUM_BANKS(ADDR_SURF_16_BANK)); 3237 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3238 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3239 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3240 NUM_BANKS(ADDR_SURF_16_BANK)); 3241 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3244 NUM_BANKS(ADDR_SURF_16_BANK)); 3245 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3246 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3247 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3248 NUM_BANKS(ADDR_SURF_16_BANK)); 3249 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3250 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3251 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3252 NUM_BANKS(ADDR_SURF_16_BANK)); 3253 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3256 NUM_BANKS(ADDR_SURF_16_BANK)); 3257 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3260 NUM_BANKS(ADDR_SURF_8_BANK)); 3261 3262 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3263 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3264 reg_offset != 23) 3265 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3266 3267 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3268 if (reg_offset != 7) 3269 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3270 3271 break; 3272 default: 3273 dev_warn(adev->dev, 3274 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3275 adev->asic_type); 3276 3277 case CHIP_CARRIZO: 3278 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3279 PIPE_CONFIG(ADDR_SURF_P2) | 3280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3282 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3283 PIPE_CONFIG(ADDR_SURF_P2) | 3284 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3286 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3287 PIPE_CONFIG(ADDR_SURF_P2) | 3288 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3290 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3291 PIPE_CONFIG(ADDR_SURF_P2) | 3292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3294 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3295 PIPE_CONFIG(ADDR_SURF_P2) | 3296 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3298 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3299 PIPE_CONFIG(ADDR_SURF_P2) | 3300 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3302 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3303 PIPE_CONFIG(ADDR_SURF_P2) | 3304 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3306 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3307 PIPE_CONFIG(ADDR_SURF_P2)); 3308 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3309 PIPE_CONFIG(ADDR_SURF_P2) | 3310 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3311 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3312 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3313 PIPE_CONFIG(ADDR_SURF_P2) | 3314 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3315 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3316 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3317 PIPE_CONFIG(ADDR_SURF_P2) | 3318 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3320 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3321 PIPE_CONFIG(ADDR_SURF_P2) | 3322 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3323 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3324 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3325 PIPE_CONFIG(ADDR_SURF_P2) | 3326 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3327 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3328 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3329 PIPE_CONFIG(ADDR_SURF_P2) | 3330 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3332 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3333 PIPE_CONFIG(ADDR_SURF_P2) | 3334 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3335 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3336 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3337 PIPE_CONFIG(ADDR_SURF_P2) | 3338 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3339 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3340 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3341 PIPE_CONFIG(ADDR_SURF_P2) | 3342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3344 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3345 PIPE_CONFIG(ADDR_SURF_P2) | 3346 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3347 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3348 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3349 PIPE_CONFIG(ADDR_SURF_P2) | 3350 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3351 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3352 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3353 PIPE_CONFIG(ADDR_SURF_P2) | 3354 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3356 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3357 PIPE_CONFIG(ADDR_SURF_P2) | 3358 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3360 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3361 PIPE_CONFIG(ADDR_SURF_P2) | 3362 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3364 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3365 PIPE_CONFIG(ADDR_SURF_P2) | 3366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3368 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3369 PIPE_CONFIG(ADDR_SURF_P2) | 3370 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3372 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3373 PIPE_CONFIG(ADDR_SURF_P2) | 3374 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3376 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3377 PIPE_CONFIG(ADDR_SURF_P2) | 3378 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3380 3381 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3382 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3383 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3384 NUM_BANKS(ADDR_SURF_8_BANK)); 3385 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3386 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3387 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3388 NUM_BANKS(ADDR_SURF_8_BANK)); 3389 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3390 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3391 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3392 NUM_BANKS(ADDR_SURF_8_BANK)); 3393 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3394 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3395 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3396 NUM_BANKS(ADDR_SURF_8_BANK)); 3397 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3398 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3399 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3400 NUM_BANKS(ADDR_SURF_8_BANK)); 3401 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3402 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3403 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3404 NUM_BANKS(ADDR_SURF_8_BANK)); 3405 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3406 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3407 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3408 NUM_BANKS(ADDR_SURF_8_BANK)); 3409 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3412 NUM_BANKS(ADDR_SURF_16_BANK)); 3413 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3414 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3415 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3416 NUM_BANKS(ADDR_SURF_16_BANK)); 3417 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3418 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3419 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3420 NUM_BANKS(ADDR_SURF_16_BANK)); 3421 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3424 NUM_BANKS(ADDR_SURF_16_BANK)); 3425 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3426 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3427 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3428 NUM_BANKS(ADDR_SURF_16_BANK)); 3429 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3430 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3431 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3432 NUM_BANKS(ADDR_SURF_16_BANK)); 3433 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3436 NUM_BANKS(ADDR_SURF_8_BANK)); 3437 3438 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3439 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3440 reg_offset != 23) 3441 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3442 3443 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3444 if (reg_offset != 7) 3445 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3446 3447 break; 3448 } 3449 } 3450 3451 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3452 u32 se_num, u32 sh_num, u32 instance) 3453 { 3454 u32 data; 3455 3456 if (instance == 0xffffffff) 3457 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3458 else 3459 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3460 3461 if (se_num == 0xffffffff) 3462 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3463 else 3464 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3465 3466 if (sh_num == 0xffffffff) 3467 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3468 else 3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3470 3471 WREG32(mmGRBM_GFX_INDEX, data); 3472 } 3473 3474 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3475 { 3476 return (u32)((1ULL << bit_width) - 1); 3477 } 3478 3479 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3480 { 3481 u32 data, mask; 3482 3483 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3484 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3485 3486 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3487 3488 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3489 adev->gfx.config.max_sh_per_se); 3490 3491 return (~data) & mask; 3492 } 3493 3494 static void 3495 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3496 { 3497 switch (adev->asic_type) { 3498 case CHIP_FIJI: 3499 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3500 RB_XSEL2(1) | PKR_MAP(2) | 3501 PKR_XSEL(1) | PKR_YSEL(1) | 3502 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3503 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3504 SE_PAIR_YSEL(2); 3505 break; 3506 case CHIP_TONGA: 3507 case CHIP_POLARIS10: 3508 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3509 SE_XSEL(1) | SE_YSEL(1); 3510 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3511 SE_PAIR_YSEL(2); 3512 break; 3513 case CHIP_TOPAZ: 3514 case CHIP_CARRIZO: 3515 *rconf |= RB_MAP_PKR0(2); 3516 *rconf1 |= 0x0; 3517 break; 3518 case CHIP_POLARIS11: 3519 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3520 SE_XSEL(1) | SE_YSEL(1); 3521 *rconf1 |= 0x0; 3522 break; 3523 case CHIP_STONEY: 3524 *rconf |= 0x0; 3525 *rconf1 |= 0x0; 3526 break; 3527 default: 3528 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3529 break; 3530 } 3531 } 3532 3533 static void 3534 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3535 u32 raster_config, u32 raster_config_1, 3536 unsigned rb_mask, unsigned num_rb) 3537 { 3538 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3539 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3540 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3541 unsigned rb_per_se = num_rb / num_se; 3542 unsigned se_mask[4]; 3543 unsigned se; 3544 3545 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3546 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3547 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3548 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3549 3550 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3551 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3552 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3553 3554 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3555 (!se_mask[2] && !se_mask[3]))) { 3556 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3557 3558 if (!se_mask[0] && !se_mask[1]) { 3559 raster_config_1 |= 3560 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3561 } else { 3562 raster_config_1 |= 3563 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3564 } 3565 } 3566 3567 for (se = 0; se < num_se; se++) { 3568 unsigned raster_config_se = raster_config; 3569 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3570 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3571 int idx = (se / 2) * 2; 3572 3573 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3574 raster_config_se &= ~SE_MAP_MASK; 3575 3576 if (!se_mask[idx]) { 3577 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3578 } else { 3579 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3580 } 3581 } 3582 3583 pkr0_mask &= rb_mask; 3584 pkr1_mask &= rb_mask; 3585 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3586 raster_config_se &= ~PKR_MAP_MASK; 3587 3588 if (!pkr0_mask) { 3589 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3590 } else { 3591 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3592 } 3593 } 3594 3595 if (rb_per_se >= 2) { 3596 unsigned rb0_mask = 1 << (se * rb_per_se); 3597 unsigned rb1_mask = rb0_mask << 1; 3598 3599 rb0_mask &= rb_mask; 3600 rb1_mask &= rb_mask; 3601 if (!rb0_mask || !rb1_mask) { 3602 raster_config_se &= ~RB_MAP_PKR0_MASK; 3603 3604 if (!rb0_mask) { 3605 raster_config_se |= 3606 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3607 } else { 3608 raster_config_se |= 3609 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3610 } 3611 } 3612 3613 if (rb_per_se > 2) { 3614 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3615 rb1_mask = rb0_mask << 1; 3616 rb0_mask &= rb_mask; 3617 rb1_mask &= rb_mask; 3618 if (!rb0_mask || !rb1_mask) { 3619 raster_config_se &= ~RB_MAP_PKR1_MASK; 3620 3621 if (!rb0_mask) { 3622 raster_config_se |= 3623 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3624 } else { 3625 raster_config_se |= 3626 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3627 } 3628 } 3629 } 3630 } 3631 3632 /* GRBM_GFX_INDEX has a different offset on VI */ 3633 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3634 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3635 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3636 } 3637 3638 /* GRBM_GFX_INDEX has a different offset on VI */ 3639 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3640 } 3641 3642 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3643 { 3644 int i, j; 3645 u32 data; 3646 u32 raster_config = 0, raster_config_1 = 0; 3647 u32 active_rbs = 0; 3648 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3649 adev->gfx.config.max_sh_per_se; 3650 unsigned num_rb_pipes; 3651 3652 mutex_lock(&adev->grbm_idx_mutex); 3653 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3654 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3655 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3656 data = gfx_v8_0_get_rb_active_bitmap(adev); 3657 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3658 rb_bitmap_width_per_sh); 3659 } 3660 } 3661 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3662 3663 adev->gfx.config.backend_enable_mask = active_rbs; 3664 adev->gfx.config.num_rbs = hweight32(active_rbs); 3665 3666 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3667 adev->gfx.config.max_shader_engines, 16); 3668 3669 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3670 3671 if (!adev->gfx.config.backend_enable_mask || 3672 adev->gfx.config.num_rbs >= num_rb_pipes) { 3673 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3674 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3675 } else { 3676 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3677 adev->gfx.config.backend_enable_mask, 3678 num_rb_pipes); 3679 } 3680 3681 mutex_unlock(&adev->grbm_idx_mutex); 3682 } 3683 3684 /** 3685 * gfx_v8_0_init_compute_vmid - gart enable 3686 * 3687 * @rdev: amdgpu_device pointer 3688 * 3689 * Initialize compute vmid sh_mem registers 3690 * 3691 */ 3692 #define DEFAULT_SH_MEM_BASES (0x6000) 3693 #define FIRST_COMPUTE_VMID (8) 3694 #define LAST_COMPUTE_VMID (16) 3695 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3696 { 3697 int i; 3698 uint32_t sh_mem_config; 3699 uint32_t sh_mem_bases; 3700 3701 /* 3702 * Configure apertures: 3703 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3704 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3705 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3706 */ 3707 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3708 3709 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3710 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3711 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3712 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3713 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3714 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3715 3716 mutex_lock(&adev->srbm_mutex); 3717 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3718 vi_srbm_select(adev, 0, 0, 0, i); 3719 /* CP and shaders */ 3720 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3721 WREG32(mmSH_MEM_APE1_BASE, 1); 3722 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3723 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3724 } 3725 vi_srbm_select(adev, 0, 0, 0, 0); 3726 mutex_unlock(&adev->srbm_mutex); 3727 } 3728 3729 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3730 { 3731 u32 tmp; 3732 int i; 3733 3734 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3735 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3736 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3737 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3738 3739 gfx_v8_0_tiling_mode_table_init(adev); 3740 gfx_v8_0_setup_rb(adev); 3741 gfx_v8_0_get_cu_info(adev); 3742 3743 /* XXX SH_MEM regs */ 3744 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3745 mutex_lock(&adev->srbm_mutex); 3746 for (i = 0; i < 16; i++) { 3747 vi_srbm_select(adev, 0, 0, 0, i); 3748 /* CP and shaders */ 3749 if (i == 0) { 3750 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3751 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3753 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3754 WREG32(mmSH_MEM_CONFIG, tmp); 3755 } else { 3756 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3757 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3758 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3759 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3760 WREG32(mmSH_MEM_CONFIG, tmp); 3761 } 3762 3763 WREG32(mmSH_MEM_APE1_BASE, 1); 3764 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3765 WREG32(mmSH_MEM_BASES, 0); 3766 } 3767 vi_srbm_select(adev, 0, 0, 0, 0); 3768 mutex_unlock(&adev->srbm_mutex); 3769 3770 gfx_v8_0_init_compute_vmid(adev); 3771 3772 mutex_lock(&adev->grbm_idx_mutex); 3773 /* 3774 * making sure that the following register writes will be broadcasted 3775 * to all the shaders 3776 */ 3777 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3778 3779 WREG32(mmPA_SC_FIFO_SIZE, 3780 (adev->gfx.config.sc_prim_fifo_size_frontend << 3781 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3782 (adev->gfx.config.sc_prim_fifo_size_backend << 3783 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3784 (adev->gfx.config.sc_hiz_tile_fifo_size << 3785 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3786 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3787 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3788 mutex_unlock(&adev->grbm_idx_mutex); 3789 3790 } 3791 3792 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3793 { 3794 u32 i, j, k; 3795 u32 mask; 3796 3797 mutex_lock(&adev->grbm_idx_mutex); 3798 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3799 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3800 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3801 for (k = 0; k < adev->usec_timeout; k++) { 3802 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3803 break; 3804 udelay(1); 3805 } 3806 } 3807 } 3808 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3809 mutex_unlock(&adev->grbm_idx_mutex); 3810 3811 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3812 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3813 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3814 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3815 for (k = 0; k < adev->usec_timeout; k++) { 3816 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3817 break; 3818 udelay(1); 3819 } 3820 } 3821 3822 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3823 bool enable) 3824 { 3825 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3826 3827 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3828 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3829 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3830 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3831 3832 WREG32(mmCP_INT_CNTL_RING0, tmp); 3833 } 3834 3835 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3836 { 3837 /* csib */ 3838 WREG32(mmRLC_CSIB_ADDR_HI, 3839 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3840 WREG32(mmRLC_CSIB_ADDR_LO, 3841 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3842 WREG32(mmRLC_CSIB_LENGTH, 3843 adev->gfx.rlc.clear_state_size); 3844 } 3845 3846 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3847 int ind_offset, 3848 int list_size, 3849 int *unique_indices, 3850 int *indices_count, 3851 int max_indices, 3852 int *ind_start_offsets, 3853 int *offset_count, 3854 int max_offset) 3855 { 3856 int indices; 3857 bool new_entry = true; 3858 3859 for (; ind_offset < list_size; ind_offset++) { 3860 3861 if (new_entry) { 3862 new_entry = false; 3863 ind_start_offsets[*offset_count] = ind_offset; 3864 *offset_count = *offset_count + 1; 3865 BUG_ON(*offset_count >= max_offset); 3866 } 3867 3868 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3869 new_entry = true; 3870 continue; 3871 } 3872 3873 ind_offset += 2; 3874 3875 /* look for the matching indice */ 3876 for (indices = 0; 3877 indices < *indices_count; 3878 indices++) { 3879 if (unique_indices[indices] == 3880 register_list_format[ind_offset]) 3881 break; 3882 } 3883 3884 if (indices >= *indices_count) { 3885 unique_indices[*indices_count] = 3886 register_list_format[ind_offset]; 3887 indices = *indices_count; 3888 *indices_count = *indices_count + 1; 3889 BUG_ON(*indices_count >= max_indices); 3890 } 3891 3892 register_list_format[ind_offset] = indices; 3893 } 3894 } 3895 3896 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3897 { 3898 int i, temp, data; 3899 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3900 int indices_count = 0; 3901 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3902 int offset_count = 0; 3903 3904 int list_size; 3905 unsigned int *register_list_format = 3906 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3907 if (register_list_format == NULL) 3908 return -ENOMEM; 3909 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3910 adev->gfx.rlc.reg_list_format_size_bytes); 3911 3912 gfx_v8_0_parse_ind_reg_list(register_list_format, 3913 RLC_FormatDirectRegListLength, 3914 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3915 unique_indices, 3916 &indices_count, 3917 sizeof(unique_indices) / sizeof(int), 3918 indirect_start_offsets, 3919 &offset_count, 3920 sizeof(indirect_start_offsets)/sizeof(int)); 3921 3922 /* save and restore list */ 3923 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3924 3925 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3926 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3927 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3928 3929 /* indirect list */ 3930 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3931 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3932 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3933 3934 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3935 list_size = list_size >> 1; 3936 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3937 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3938 3939 /* starting offsets starts */ 3940 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3941 adev->gfx.rlc.starting_offsets_start); 3942 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3943 WREG32(mmRLC_GPM_SCRATCH_DATA, 3944 indirect_start_offsets[i]); 3945 3946 /* unique indices */ 3947 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3948 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3949 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3950 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); 3951 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); 3952 } 3953 kfree(register_list_format); 3954 3955 return 0; 3956 } 3957 3958 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3959 { 3960 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3961 } 3962 3963 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3964 { 3965 uint32_t data; 3966 3967 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3968 AMD_PG_SUPPORT_GFX_SMG | 3969 AMD_PG_SUPPORT_GFX_DMG)) { 3970 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3971 3972 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3973 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3974 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3975 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3976 WREG32(mmRLC_PG_DELAY, data); 3977 3978 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3979 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3980 } 3981 } 3982 3983 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3984 bool enable) 3985 { 3986 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 3987 } 3988 3989 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3990 bool enable) 3991 { 3992 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 3993 } 3994 3995 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 3996 { 3997 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0); 3998 } 3999 4000 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4001 { 4002 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 4003 AMD_PG_SUPPORT_GFX_SMG | 4004 AMD_PG_SUPPORT_GFX_DMG | 4005 AMD_PG_SUPPORT_CP | 4006 AMD_PG_SUPPORT_GDS | 4007 AMD_PG_SUPPORT_RLC_SMU_HS)) { 4008 gfx_v8_0_init_csb(adev); 4009 gfx_v8_0_init_save_restore_list(adev); 4010 gfx_v8_0_enable_save_restore_machine(adev); 4011 4012 if ((adev->asic_type == CHIP_CARRIZO) || 4013 (adev->asic_type == CHIP_STONEY)) { 4014 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4015 gfx_v8_0_init_power_gating(adev); 4016 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4017 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4018 cz_enable_sck_slow_down_on_power_up(adev, true); 4019 cz_enable_sck_slow_down_on_power_down(adev, true); 4020 } else { 4021 cz_enable_sck_slow_down_on_power_up(adev, false); 4022 cz_enable_sck_slow_down_on_power_down(adev, false); 4023 } 4024 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4025 cz_enable_cp_power_gating(adev, true); 4026 else 4027 cz_enable_cp_power_gating(adev, false); 4028 } else if (adev->asic_type == CHIP_POLARIS11) { 4029 gfx_v8_0_init_power_gating(adev); 4030 } 4031 } 4032 } 4033 4034 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4035 { 4036 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4037 4038 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4039 gfx_v8_0_wait_for_rlc_serdes(adev); 4040 } 4041 4042 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4043 { 4044 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4045 udelay(50); 4046 4047 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4048 udelay(50); 4049 } 4050 4051 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4052 { 4053 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4054 4055 /* carrizo do enable cp interrupt after cp inited */ 4056 if (!(adev->flags & AMD_IS_APU)) 4057 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4058 4059 udelay(50); 4060 } 4061 4062 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4063 { 4064 const struct rlc_firmware_header_v2_0 *hdr; 4065 const __le32 *fw_data; 4066 unsigned i, fw_size; 4067 4068 if (!adev->gfx.rlc_fw) 4069 return -EINVAL; 4070 4071 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4072 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4073 4074 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4075 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4076 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4077 4078 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4079 for (i = 0; i < fw_size; i++) 4080 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4081 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4082 4083 return 0; 4084 } 4085 4086 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4087 { 4088 int r; 4089 u32 tmp; 4090 4091 gfx_v8_0_rlc_stop(adev); 4092 4093 /* disable CG */ 4094 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4095 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4096 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4097 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4098 if (adev->asic_type == CHIP_POLARIS11 || 4099 adev->asic_type == CHIP_POLARIS10) { 4100 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4101 tmp &= ~0x3; 4102 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4103 } 4104 4105 /* disable PG */ 4106 WREG32(mmRLC_PG_CNTL, 0); 4107 4108 gfx_v8_0_rlc_reset(adev); 4109 gfx_v8_0_init_pg(adev); 4110 4111 if (!adev->pp_enabled) { 4112 if (!adev->firmware.smu_load) { 4113 /* legacy rlc firmware loading */ 4114 r = gfx_v8_0_rlc_load_microcode(adev); 4115 if (r) 4116 return r; 4117 } else { 4118 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4119 AMDGPU_UCODE_ID_RLC_G); 4120 if (r) 4121 return -EINVAL; 4122 } 4123 } 4124 4125 gfx_v8_0_rlc_start(adev); 4126 4127 return 0; 4128 } 4129 4130 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4131 { 4132 int i; 4133 u32 tmp = RREG32(mmCP_ME_CNTL); 4134 4135 if (enable) { 4136 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4138 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4139 } else { 4140 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4142 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4143 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4144 adev->gfx.gfx_ring[i].ready = false; 4145 } 4146 WREG32(mmCP_ME_CNTL, tmp); 4147 udelay(50); 4148 } 4149 4150 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4151 { 4152 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4153 const struct gfx_firmware_header_v1_0 *ce_hdr; 4154 const struct gfx_firmware_header_v1_0 *me_hdr; 4155 const __le32 *fw_data; 4156 unsigned i, fw_size; 4157 4158 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4159 return -EINVAL; 4160 4161 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4162 adev->gfx.pfp_fw->data; 4163 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4164 adev->gfx.ce_fw->data; 4165 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4166 adev->gfx.me_fw->data; 4167 4168 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4169 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4170 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4171 4172 gfx_v8_0_cp_gfx_enable(adev, false); 4173 4174 /* PFP */ 4175 fw_data = (const __le32 *) 4176 (adev->gfx.pfp_fw->data + 4177 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4178 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4179 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4180 for (i = 0; i < fw_size; i++) 4181 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4182 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4183 4184 /* CE */ 4185 fw_data = (const __le32 *) 4186 (adev->gfx.ce_fw->data + 4187 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4188 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4189 WREG32(mmCP_CE_UCODE_ADDR, 0); 4190 for (i = 0; i < fw_size; i++) 4191 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4192 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4193 4194 /* ME */ 4195 fw_data = (const __le32 *) 4196 (adev->gfx.me_fw->data + 4197 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4198 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4199 WREG32(mmCP_ME_RAM_WADDR, 0); 4200 for (i = 0; i < fw_size; i++) 4201 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4202 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4203 4204 return 0; 4205 } 4206 4207 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4208 { 4209 u32 count = 0; 4210 const struct cs_section_def *sect = NULL; 4211 const struct cs_extent_def *ext = NULL; 4212 4213 /* begin clear state */ 4214 count += 2; 4215 /* context control state */ 4216 count += 3; 4217 4218 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4219 for (ext = sect->section; ext->extent != NULL; ++ext) { 4220 if (sect->id == SECT_CONTEXT) 4221 count += 2 + ext->reg_count; 4222 else 4223 return 0; 4224 } 4225 } 4226 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4227 count += 4; 4228 /* end clear state */ 4229 count += 2; 4230 /* clear state */ 4231 count += 2; 4232 4233 return count; 4234 } 4235 4236 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4237 { 4238 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4239 const struct cs_section_def *sect = NULL; 4240 const struct cs_extent_def *ext = NULL; 4241 int r, i; 4242 4243 /* init the CP */ 4244 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4245 WREG32(mmCP_ENDIAN_SWAP, 0); 4246 WREG32(mmCP_DEVICE_ID, 1); 4247 4248 gfx_v8_0_cp_gfx_enable(adev, true); 4249 4250 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4251 if (r) { 4252 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4253 return r; 4254 } 4255 4256 /* clear state buffer */ 4257 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4258 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4259 4260 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4261 amdgpu_ring_write(ring, 0x80000000); 4262 amdgpu_ring_write(ring, 0x80000000); 4263 4264 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4265 for (ext = sect->section; ext->extent != NULL; ++ext) { 4266 if (sect->id == SECT_CONTEXT) { 4267 amdgpu_ring_write(ring, 4268 PACKET3(PACKET3_SET_CONTEXT_REG, 4269 ext->reg_count)); 4270 amdgpu_ring_write(ring, 4271 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4272 for (i = 0; i < ext->reg_count; i++) 4273 amdgpu_ring_write(ring, ext->extent[i]); 4274 } 4275 } 4276 } 4277 4278 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4279 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4280 switch (adev->asic_type) { 4281 case CHIP_TONGA: 4282 case CHIP_POLARIS10: 4283 amdgpu_ring_write(ring, 0x16000012); 4284 amdgpu_ring_write(ring, 0x0000002A); 4285 break; 4286 case CHIP_POLARIS11: 4287 amdgpu_ring_write(ring, 0x16000012); 4288 amdgpu_ring_write(ring, 0x00000000); 4289 break; 4290 case CHIP_FIJI: 4291 amdgpu_ring_write(ring, 0x3a00161a); 4292 amdgpu_ring_write(ring, 0x0000002e); 4293 break; 4294 case CHIP_CARRIZO: 4295 amdgpu_ring_write(ring, 0x00000002); 4296 amdgpu_ring_write(ring, 0x00000000); 4297 break; 4298 case CHIP_TOPAZ: 4299 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4300 0x00000000 : 0x00000002); 4301 amdgpu_ring_write(ring, 0x00000000); 4302 break; 4303 case CHIP_STONEY: 4304 amdgpu_ring_write(ring, 0x00000000); 4305 amdgpu_ring_write(ring, 0x00000000); 4306 break; 4307 default: 4308 BUG(); 4309 } 4310 4311 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4312 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4313 4314 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4315 amdgpu_ring_write(ring, 0); 4316 4317 /* init the CE partitions */ 4318 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4319 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4320 amdgpu_ring_write(ring, 0x8000); 4321 amdgpu_ring_write(ring, 0x8000); 4322 4323 amdgpu_ring_commit(ring); 4324 4325 return 0; 4326 } 4327 4328 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4329 { 4330 struct amdgpu_ring *ring; 4331 u32 tmp; 4332 u32 rb_bufsz; 4333 u64 rb_addr, rptr_addr; 4334 int r; 4335 4336 /* Set the write pointer delay */ 4337 WREG32(mmCP_RB_WPTR_DELAY, 0); 4338 4339 /* set the RB to use vmid 0 */ 4340 WREG32(mmCP_RB_VMID, 0); 4341 4342 /* Set ring buffer size */ 4343 ring = &adev->gfx.gfx_ring[0]; 4344 rb_bufsz = order_base_2(ring->ring_size / 8); 4345 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4346 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4348 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4349 #ifdef __BIG_ENDIAN 4350 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4351 #endif 4352 WREG32(mmCP_RB0_CNTL, tmp); 4353 4354 /* Initialize the ring buffer's read and write pointers */ 4355 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4356 ring->wptr = 0; 4357 WREG32(mmCP_RB0_WPTR, ring->wptr); 4358 4359 /* set the wb address wether it's enabled or not */ 4360 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4361 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4362 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4363 4364 mdelay(1); 4365 WREG32(mmCP_RB0_CNTL, tmp); 4366 4367 rb_addr = ring->gpu_addr >> 8; 4368 WREG32(mmCP_RB0_BASE, rb_addr); 4369 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4370 4371 /* no gfx doorbells on iceland */ 4372 if (adev->asic_type != CHIP_TOPAZ) { 4373 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4374 if (ring->use_doorbell) { 4375 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4376 DOORBELL_OFFSET, ring->doorbell_index); 4377 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4378 DOORBELL_HIT, 0); 4379 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4380 DOORBELL_EN, 1); 4381 } else { 4382 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4383 DOORBELL_EN, 0); 4384 } 4385 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4386 4387 if (adev->asic_type == CHIP_TONGA) { 4388 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4389 DOORBELL_RANGE_LOWER, 4390 AMDGPU_DOORBELL_GFX_RING0); 4391 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4392 4393 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4394 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4395 } 4396 4397 } 4398 4399 /* start the ring */ 4400 gfx_v8_0_cp_gfx_start(adev); 4401 ring->ready = true; 4402 r = amdgpu_ring_test_ring(ring); 4403 if (r) 4404 ring->ready = false; 4405 4406 return r; 4407 } 4408 4409 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4410 { 4411 int i; 4412 4413 if (enable) { 4414 WREG32(mmCP_MEC_CNTL, 0); 4415 } else { 4416 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4417 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4418 adev->gfx.compute_ring[i].ready = false; 4419 } 4420 udelay(50); 4421 } 4422 4423 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4424 { 4425 const struct gfx_firmware_header_v1_0 *mec_hdr; 4426 const __le32 *fw_data; 4427 unsigned i, fw_size; 4428 4429 if (!adev->gfx.mec_fw) 4430 return -EINVAL; 4431 4432 gfx_v8_0_cp_compute_enable(adev, false); 4433 4434 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4435 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4436 4437 fw_data = (const __le32 *) 4438 (adev->gfx.mec_fw->data + 4439 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4440 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4441 4442 /* MEC1 */ 4443 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4444 for (i = 0; i < fw_size; i++) 4445 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4446 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4447 4448 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4449 if (adev->gfx.mec2_fw) { 4450 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4451 4452 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4453 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4454 4455 fw_data = (const __le32 *) 4456 (adev->gfx.mec2_fw->data + 4457 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4458 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4459 4460 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4461 for (i = 0; i < fw_size; i++) 4462 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4463 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4464 } 4465 4466 return 0; 4467 } 4468 4469 struct vi_mqd { 4470 uint32_t header; /* ordinal0 */ 4471 uint32_t compute_dispatch_initiator; /* ordinal1 */ 4472 uint32_t compute_dim_x; /* ordinal2 */ 4473 uint32_t compute_dim_y; /* ordinal3 */ 4474 uint32_t compute_dim_z; /* ordinal4 */ 4475 uint32_t compute_start_x; /* ordinal5 */ 4476 uint32_t compute_start_y; /* ordinal6 */ 4477 uint32_t compute_start_z; /* ordinal7 */ 4478 uint32_t compute_num_thread_x; /* ordinal8 */ 4479 uint32_t compute_num_thread_y; /* ordinal9 */ 4480 uint32_t compute_num_thread_z; /* ordinal10 */ 4481 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 4482 uint32_t compute_perfcount_enable; /* ordinal12 */ 4483 uint32_t compute_pgm_lo; /* ordinal13 */ 4484 uint32_t compute_pgm_hi; /* ordinal14 */ 4485 uint32_t compute_tba_lo; /* ordinal15 */ 4486 uint32_t compute_tba_hi; /* ordinal16 */ 4487 uint32_t compute_tma_lo; /* ordinal17 */ 4488 uint32_t compute_tma_hi; /* ordinal18 */ 4489 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 4490 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 4491 uint32_t compute_vmid; /* ordinal21 */ 4492 uint32_t compute_resource_limits; /* ordinal22 */ 4493 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 4494 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 4495 uint32_t compute_tmpring_size; /* ordinal25 */ 4496 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 4497 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 4498 uint32_t compute_restart_x; /* ordinal28 */ 4499 uint32_t compute_restart_y; /* ordinal29 */ 4500 uint32_t compute_restart_z; /* ordinal30 */ 4501 uint32_t compute_thread_trace_enable; /* ordinal31 */ 4502 uint32_t compute_misc_reserved; /* ordinal32 */ 4503 uint32_t compute_dispatch_id; /* ordinal33 */ 4504 uint32_t compute_threadgroup_id; /* ordinal34 */ 4505 uint32_t compute_relaunch; /* ordinal35 */ 4506 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 4507 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 4508 uint32_t compute_wave_restore_control; /* ordinal38 */ 4509 uint32_t reserved9; /* ordinal39 */ 4510 uint32_t reserved10; /* ordinal40 */ 4511 uint32_t reserved11; /* ordinal41 */ 4512 uint32_t reserved12; /* ordinal42 */ 4513 uint32_t reserved13; /* ordinal43 */ 4514 uint32_t reserved14; /* ordinal44 */ 4515 uint32_t reserved15; /* ordinal45 */ 4516 uint32_t reserved16; /* ordinal46 */ 4517 uint32_t reserved17; /* ordinal47 */ 4518 uint32_t reserved18; /* ordinal48 */ 4519 uint32_t reserved19; /* ordinal49 */ 4520 uint32_t reserved20; /* ordinal50 */ 4521 uint32_t reserved21; /* ordinal51 */ 4522 uint32_t reserved22; /* ordinal52 */ 4523 uint32_t reserved23; /* ordinal53 */ 4524 uint32_t reserved24; /* ordinal54 */ 4525 uint32_t reserved25; /* ordinal55 */ 4526 uint32_t reserved26; /* ordinal56 */ 4527 uint32_t reserved27; /* ordinal57 */ 4528 uint32_t reserved28; /* ordinal58 */ 4529 uint32_t reserved29; /* ordinal59 */ 4530 uint32_t reserved30; /* ordinal60 */ 4531 uint32_t reserved31; /* ordinal61 */ 4532 uint32_t reserved32; /* ordinal62 */ 4533 uint32_t reserved33; /* ordinal63 */ 4534 uint32_t reserved34; /* ordinal64 */ 4535 uint32_t compute_user_data_0; /* ordinal65 */ 4536 uint32_t compute_user_data_1; /* ordinal66 */ 4537 uint32_t compute_user_data_2; /* ordinal67 */ 4538 uint32_t compute_user_data_3; /* ordinal68 */ 4539 uint32_t compute_user_data_4; /* ordinal69 */ 4540 uint32_t compute_user_data_5; /* ordinal70 */ 4541 uint32_t compute_user_data_6; /* ordinal71 */ 4542 uint32_t compute_user_data_7; /* ordinal72 */ 4543 uint32_t compute_user_data_8; /* ordinal73 */ 4544 uint32_t compute_user_data_9; /* ordinal74 */ 4545 uint32_t compute_user_data_10; /* ordinal75 */ 4546 uint32_t compute_user_data_11; /* ordinal76 */ 4547 uint32_t compute_user_data_12; /* ordinal77 */ 4548 uint32_t compute_user_data_13; /* ordinal78 */ 4549 uint32_t compute_user_data_14; /* ordinal79 */ 4550 uint32_t compute_user_data_15; /* ordinal80 */ 4551 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 4552 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 4553 uint32_t reserved35; /* ordinal83 */ 4554 uint32_t reserved36; /* ordinal84 */ 4555 uint32_t reserved37; /* ordinal85 */ 4556 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 4557 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 4558 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 4559 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 4560 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 4561 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 4562 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 4563 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 4564 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 4565 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 4566 uint32_t reserved38; /* ordinal96 */ 4567 uint32_t reserved39; /* ordinal97 */ 4568 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 4569 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 4570 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 4571 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 4572 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 4573 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 4574 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 4575 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 4576 uint32_t reserved40; /* ordinal106 */ 4577 uint32_t reserved41; /* ordinal107 */ 4578 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 4579 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 4580 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 4581 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 4582 uint32_t reserved42; /* ordinal112 */ 4583 uint32_t reserved43; /* ordinal113 */ 4584 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 4585 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 4586 uint32_t cp_packet_id_lo; /* ordinal116 */ 4587 uint32_t cp_packet_id_hi; /* ordinal117 */ 4588 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 4589 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 4590 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 4591 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 4592 uint32_t gds_save_mask_lo; /* ordinal122 */ 4593 uint32_t gds_save_mask_hi; /* ordinal123 */ 4594 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 4595 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 4596 uint32_t reserved44; /* ordinal126 */ 4597 uint32_t reserved45; /* ordinal127 */ 4598 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 4599 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 4600 uint32_t cp_hqd_active; /* ordinal130 */ 4601 uint32_t cp_hqd_vmid; /* ordinal131 */ 4602 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 4603 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 4604 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 4605 uint32_t cp_hqd_quantum; /* ordinal135 */ 4606 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 4607 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 4608 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 4609 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 4610 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 4611 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 4612 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 4613 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 4614 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 4615 uint32_t cp_hqd_pq_control; /* ordinal145 */ 4616 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 4617 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 4618 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 4619 uint32_t cp_hqd_ib_control; /* ordinal149 */ 4620 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 4621 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 4622 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 4623 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 4624 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 4625 uint32_t cp_hqd_msg_type; /* ordinal155 */ 4626 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 4627 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 4628 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 4629 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 4630 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 4631 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 4632 uint32_t cp_mqd_control; /* ordinal162 */ 4633 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 4634 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 4635 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 4636 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 4637 uint32_t cp_hqd_eop_control; /* ordinal167 */ 4638 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 4639 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 4640 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 4641 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 4642 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 4643 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 4644 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 4645 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 4646 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 4647 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 4648 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 4649 uint32_t cp_hqd_error; /* ordinal179 */ 4650 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 4651 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 4652 uint32_t reserved46; /* ordinal182 */ 4653 uint32_t reserved47; /* ordinal183 */ 4654 uint32_t reserved48; /* ordinal184 */ 4655 uint32_t reserved49; /* ordinal185 */ 4656 uint32_t reserved50; /* ordinal186 */ 4657 uint32_t reserved51; /* ordinal187 */ 4658 uint32_t reserved52; /* ordinal188 */ 4659 uint32_t reserved53; /* ordinal189 */ 4660 uint32_t reserved54; /* ordinal190 */ 4661 uint32_t reserved55; /* ordinal191 */ 4662 uint32_t iqtimer_pkt_header; /* ordinal192 */ 4663 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 4664 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 4665 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 4666 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 4667 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 4668 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 4669 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 4670 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 4671 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 4672 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 4673 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 4674 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 4675 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 4676 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 4677 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 4678 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 4679 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 4680 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 4681 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 4682 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 4683 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 4684 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 4685 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 4686 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 4687 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 4688 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 4689 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 4690 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 4691 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 4692 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 4693 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 4694 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 4695 uint32_t reserved56; /* ordinal225 */ 4696 uint32_t reserved57; /* ordinal226 */ 4697 uint32_t reserved58; /* ordinal227 */ 4698 uint32_t set_resources_header; /* ordinal228 */ 4699 uint32_t set_resources_dw1; /* ordinal229 */ 4700 uint32_t set_resources_dw2; /* ordinal230 */ 4701 uint32_t set_resources_dw3; /* ordinal231 */ 4702 uint32_t set_resources_dw4; /* ordinal232 */ 4703 uint32_t set_resources_dw5; /* ordinal233 */ 4704 uint32_t set_resources_dw6; /* ordinal234 */ 4705 uint32_t set_resources_dw7; /* ordinal235 */ 4706 uint32_t reserved59; /* ordinal236 */ 4707 uint32_t reserved60; /* ordinal237 */ 4708 uint32_t reserved61; /* ordinal238 */ 4709 uint32_t reserved62; /* ordinal239 */ 4710 uint32_t reserved63; /* ordinal240 */ 4711 uint32_t reserved64; /* ordinal241 */ 4712 uint32_t reserved65; /* ordinal242 */ 4713 uint32_t reserved66; /* ordinal243 */ 4714 uint32_t reserved67; /* ordinal244 */ 4715 uint32_t reserved68; /* ordinal245 */ 4716 uint32_t reserved69; /* ordinal246 */ 4717 uint32_t reserved70; /* ordinal247 */ 4718 uint32_t reserved71; /* ordinal248 */ 4719 uint32_t reserved72; /* ordinal249 */ 4720 uint32_t reserved73; /* ordinal250 */ 4721 uint32_t reserved74; /* ordinal251 */ 4722 uint32_t reserved75; /* ordinal252 */ 4723 uint32_t reserved76; /* ordinal253 */ 4724 uint32_t reserved77; /* ordinal254 */ 4725 uint32_t reserved78; /* ordinal255 */ 4726 4727 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 4728 }; 4729 4730 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4731 { 4732 int i, r; 4733 4734 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4735 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4736 4737 if (ring->mqd_obj) { 4738 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4739 if (unlikely(r != 0)) 4740 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4741 4742 amdgpu_bo_unpin(ring->mqd_obj); 4743 amdgpu_bo_unreserve(ring->mqd_obj); 4744 4745 amdgpu_bo_unref(&ring->mqd_obj); 4746 ring->mqd_obj = NULL; 4747 } 4748 } 4749 } 4750 4751 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4752 { 4753 int r, i, j; 4754 u32 tmp; 4755 bool use_doorbell = true; 4756 u64 hqd_gpu_addr; 4757 u64 mqd_gpu_addr; 4758 u64 eop_gpu_addr; 4759 u64 wb_gpu_addr; 4760 u32 *buf; 4761 struct vi_mqd *mqd; 4762 4763 /* init the pipes */ 4764 mutex_lock(&adev->srbm_mutex); 4765 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 4766 int me = (i < 4) ? 1 : 2; 4767 int pipe = (i < 4) ? i : (i - 4); 4768 4769 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4770 eop_gpu_addr >>= 8; 4771 4772 vi_srbm_select(adev, me, pipe, 0, 0); 4773 4774 /* write the EOP addr */ 4775 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4776 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4777 4778 /* set the VMID assigned */ 4779 WREG32(mmCP_HQD_VMID, 0); 4780 4781 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4782 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4783 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4784 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4785 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4786 } 4787 vi_srbm_select(adev, 0, 0, 0, 0); 4788 mutex_unlock(&adev->srbm_mutex); 4789 4790 /* init the queues. Just two for now. */ 4791 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4792 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4793 4794 if (ring->mqd_obj == NULL) { 4795 r = amdgpu_bo_create(adev, 4796 sizeof(struct vi_mqd), 4797 PAGE_SIZE, true, 4798 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4799 NULL, &ring->mqd_obj); 4800 if (r) { 4801 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4802 return r; 4803 } 4804 } 4805 4806 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4807 if (unlikely(r != 0)) { 4808 gfx_v8_0_cp_compute_fini(adev); 4809 return r; 4810 } 4811 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4812 &mqd_gpu_addr); 4813 if (r) { 4814 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4815 gfx_v8_0_cp_compute_fini(adev); 4816 return r; 4817 } 4818 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4819 if (r) { 4820 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4821 gfx_v8_0_cp_compute_fini(adev); 4822 return r; 4823 } 4824 4825 /* init the mqd struct */ 4826 memset(buf, 0, sizeof(struct vi_mqd)); 4827 4828 mqd = (struct vi_mqd *)buf; 4829 mqd->header = 0xC0310800; 4830 mqd->compute_pipelinestat_enable = 0x00000001; 4831 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4832 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4833 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4834 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4835 mqd->compute_misc_reserved = 0x00000003; 4836 4837 mutex_lock(&adev->srbm_mutex); 4838 vi_srbm_select(adev, ring->me, 4839 ring->pipe, 4840 ring->queue, 0); 4841 4842 /* disable wptr polling */ 4843 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4844 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4845 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4846 4847 mqd->cp_hqd_eop_base_addr_lo = 4848 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4849 mqd->cp_hqd_eop_base_addr_hi = 4850 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4851 4852 /* enable doorbell? */ 4853 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4854 if (use_doorbell) { 4855 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4856 } else { 4857 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4858 } 4859 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4860 mqd->cp_hqd_pq_doorbell_control = tmp; 4861 4862 /* disable the queue if it's active */ 4863 mqd->cp_hqd_dequeue_request = 0; 4864 mqd->cp_hqd_pq_rptr = 0; 4865 mqd->cp_hqd_pq_wptr= 0; 4866 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4867 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4868 for (j = 0; j < adev->usec_timeout; j++) { 4869 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4870 break; 4871 udelay(1); 4872 } 4873 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4874 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4875 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4876 } 4877 4878 /* set the pointer to the MQD */ 4879 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4880 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4881 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4882 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4883 4884 /* set MQD vmid to 0 */ 4885 tmp = RREG32(mmCP_MQD_CONTROL); 4886 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4887 WREG32(mmCP_MQD_CONTROL, tmp); 4888 mqd->cp_mqd_control = tmp; 4889 4890 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4891 hqd_gpu_addr = ring->gpu_addr >> 8; 4892 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4893 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4894 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4895 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4896 4897 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4898 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4899 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4900 (order_base_2(ring->ring_size / 4) - 1)); 4901 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4902 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4903 #ifdef __BIG_ENDIAN 4904 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4905 #endif 4906 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4907 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4908 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4910 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4911 mqd->cp_hqd_pq_control = tmp; 4912 4913 /* set the wb address wether it's enabled or not */ 4914 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4915 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4916 mqd->cp_hqd_pq_rptr_report_addr_hi = 4917 upper_32_bits(wb_gpu_addr) & 0xffff; 4918 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4919 mqd->cp_hqd_pq_rptr_report_addr_lo); 4920 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4921 mqd->cp_hqd_pq_rptr_report_addr_hi); 4922 4923 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4924 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4925 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4926 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4927 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 4928 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4929 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4930 4931 /* enable the doorbell if requested */ 4932 if (use_doorbell) { 4933 if ((adev->asic_type == CHIP_CARRIZO) || 4934 (adev->asic_type == CHIP_FIJI) || 4935 (adev->asic_type == CHIP_STONEY) || 4936 (adev->asic_type == CHIP_POLARIS11) || 4937 (adev->asic_type == CHIP_POLARIS10)) { 4938 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4939 AMDGPU_DOORBELL_KIQ << 2); 4940 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4941 AMDGPU_DOORBELL_MEC_RING7 << 2); 4942 } 4943 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4944 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4945 DOORBELL_OFFSET, ring->doorbell_index); 4946 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4947 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4949 mqd->cp_hqd_pq_doorbell_control = tmp; 4950 4951 } else { 4952 mqd->cp_hqd_pq_doorbell_control = 0; 4953 } 4954 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4955 mqd->cp_hqd_pq_doorbell_control); 4956 4957 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4958 ring->wptr = 0; 4959 mqd->cp_hqd_pq_wptr = ring->wptr; 4960 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4961 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4962 4963 /* set the vmid for the queue */ 4964 mqd->cp_hqd_vmid = 0; 4965 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4966 4967 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4968 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4969 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4970 mqd->cp_hqd_persistent_state = tmp; 4971 if (adev->asic_type == CHIP_STONEY || 4972 adev->asic_type == CHIP_POLARIS11 || 4973 adev->asic_type == CHIP_POLARIS10) { 4974 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4975 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4976 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4977 } 4978 4979 /* activate the queue */ 4980 mqd->cp_hqd_active = 1; 4981 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4982 4983 vi_srbm_select(adev, 0, 0, 0, 0); 4984 mutex_unlock(&adev->srbm_mutex); 4985 4986 amdgpu_bo_kunmap(ring->mqd_obj); 4987 amdgpu_bo_unreserve(ring->mqd_obj); 4988 } 4989 4990 if (use_doorbell) { 4991 tmp = RREG32(mmCP_PQ_STATUS); 4992 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4993 WREG32(mmCP_PQ_STATUS, tmp); 4994 } 4995 4996 gfx_v8_0_cp_compute_enable(adev, true); 4997 4998 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4999 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5000 5001 ring->ready = true; 5002 r = amdgpu_ring_test_ring(ring); 5003 if (r) 5004 ring->ready = false; 5005 } 5006 5007 return 0; 5008 } 5009 5010 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5011 { 5012 int r; 5013 5014 if (!(adev->flags & AMD_IS_APU)) 5015 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5016 5017 if (!adev->pp_enabled) { 5018 if (!adev->firmware.smu_load) { 5019 /* legacy firmware loading */ 5020 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5021 if (r) 5022 return r; 5023 5024 r = gfx_v8_0_cp_compute_load_microcode(adev); 5025 if (r) 5026 return r; 5027 } else { 5028 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5029 AMDGPU_UCODE_ID_CP_CE); 5030 if (r) 5031 return -EINVAL; 5032 5033 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5034 AMDGPU_UCODE_ID_CP_PFP); 5035 if (r) 5036 return -EINVAL; 5037 5038 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5039 AMDGPU_UCODE_ID_CP_ME); 5040 if (r) 5041 return -EINVAL; 5042 5043 if (adev->asic_type == CHIP_TOPAZ) { 5044 r = gfx_v8_0_cp_compute_load_microcode(adev); 5045 if (r) 5046 return r; 5047 } else { 5048 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5049 AMDGPU_UCODE_ID_CP_MEC1); 5050 if (r) 5051 return -EINVAL; 5052 } 5053 } 5054 } 5055 5056 r = gfx_v8_0_cp_gfx_resume(adev); 5057 if (r) 5058 return r; 5059 5060 r = gfx_v8_0_cp_compute_resume(adev); 5061 if (r) 5062 return r; 5063 5064 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5065 5066 return 0; 5067 } 5068 5069 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5070 { 5071 gfx_v8_0_cp_gfx_enable(adev, enable); 5072 gfx_v8_0_cp_compute_enable(adev, enable); 5073 } 5074 5075 static int gfx_v8_0_hw_init(void *handle) 5076 { 5077 int r; 5078 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5079 5080 gfx_v8_0_init_golden_registers(adev); 5081 gfx_v8_0_gpu_init(adev); 5082 5083 r = gfx_v8_0_rlc_resume(adev); 5084 if (r) 5085 return r; 5086 5087 r = gfx_v8_0_cp_resume(adev); 5088 5089 return r; 5090 } 5091 5092 static int gfx_v8_0_hw_fini(void *handle) 5093 { 5094 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5095 5096 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5097 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5098 gfx_v8_0_cp_enable(adev, false); 5099 gfx_v8_0_rlc_stop(adev); 5100 gfx_v8_0_cp_compute_fini(adev); 5101 5102 amdgpu_set_powergating_state(adev, 5103 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5104 5105 return 0; 5106 } 5107 5108 static int gfx_v8_0_suspend(void *handle) 5109 { 5110 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5111 5112 return gfx_v8_0_hw_fini(adev); 5113 } 5114 5115 static int gfx_v8_0_resume(void *handle) 5116 { 5117 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5118 5119 return gfx_v8_0_hw_init(adev); 5120 } 5121 5122 static bool gfx_v8_0_is_idle(void *handle) 5123 { 5124 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5125 5126 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5127 return false; 5128 else 5129 return true; 5130 } 5131 5132 static int gfx_v8_0_wait_for_idle(void *handle) 5133 { 5134 unsigned i; 5135 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5136 5137 for (i = 0; i < adev->usec_timeout; i++) { 5138 if (gfx_v8_0_is_idle(handle)) 5139 return 0; 5140 5141 udelay(1); 5142 } 5143 return -ETIMEDOUT; 5144 } 5145 5146 static bool gfx_v8_0_check_soft_reset(void *handle) 5147 { 5148 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5149 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5150 u32 tmp; 5151 5152 /* GRBM_STATUS */ 5153 tmp = RREG32(mmGRBM_STATUS); 5154 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5155 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5156 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5157 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5158 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5159 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5160 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5161 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5162 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5163 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5164 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5165 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5166 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5167 } 5168 5169 /* GRBM_STATUS2 */ 5170 tmp = RREG32(mmGRBM_STATUS2); 5171 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5172 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5173 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5174 5175 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5176 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5177 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5178 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5179 SOFT_RESET_CPF, 1); 5180 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5181 SOFT_RESET_CPC, 1); 5182 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5183 SOFT_RESET_CPG, 1); 5184 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5185 SOFT_RESET_GRBM, 1); 5186 } 5187 5188 /* SRBM_STATUS */ 5189 tmp = RREG32(mmSRBM_STATUS); 5190 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5191 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5192 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5193 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5194 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5195 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5196 5197 if (grbm_soft_reset || srbm_soft_reset) { 5198 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5199 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5200 return true; 5201 } else { 5202 adev->gfx.grbm_soft_reset = 0; 5203 adev->gfx.srbm_soft_reset = 0; 5204 return false; 5205 } 5206 } 5207 5208 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5209 struct amdgpu_ring *ring) 5210 { 5211 int i; 5212 5213 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5214 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5215 u32 tmp; 5216 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 5217 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, 5218 DEQUEUE_REQ, 2); 5219 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); 5220 for (i = 0; i < adev->usec_timeout; i++) { 5221 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5222 break; 5223 udelay(1); 5224 } 5225 } 5226 } 5227 5228 static int gfx_v8_0_pre_soft_reset(void *handle) 5229 { 5230 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5231 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5232 5233 if ((!adev->gfx.grbm_soft_reset) && 5234 (!adev->gfx.srbm_soft_reset)) 5235 return 0; 5236 5237 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5238 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5239 5240 /* stop the rlc */ 5241 gfx_v8_0_rlc_stop(adev); 5242 5243 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5244 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5245 /* Disable GFX parsing/prefetching */ 5246 gfx_v8_0_cp_gfx_enable(adev, false); 5247 5248 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5249 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5250 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5252 int i; 5253 5254 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5255 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5256 5257 gfx_v8_0_inactive_hqd(adev, ring); 5258 } 5259 /* Disable MEC parsing/prefetching */ 5260 gfx_v8_0_cp_compute_enable(adev, false); 5261 } 5262 5263 return 0; 5264 } 5265 5266 static int gfx_v8_0_soft_reset(void *handle) 5267 { 5268 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5269 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5270 u32 tmp; 5271 5272 if ((!adev->gfx.grbm_soft_reset) && 5273 (!adev->gfx.srbm_soft_reset)) 5274 return 0; 5275 5276 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5277 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5278 5279 if (grbm_soft_reset || srbm_soft_reset) { 5280 tmp = RREG32(mmGMCON_DEBUG); 5281 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5282 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5283 WREG32(mmGMCON_DEBUG, tmp); 5284 udelay(50); 5285 } 5286 5287 if (grbm_soft_reset) { 5288 tmp = RREG32(mmGRBM_SOFT_RESET); 5289 tmp |= grbm_soft_reset; 5290 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5291 WREG32(mmGRBM_SOFT_RESET, tmp); 5292 tmp = RREG32(mmGRBM_SOFT_RESET); 5293 5294 udelay(50); 5295 5296 tmp &= ~grbm_soft_reset; 5297 WREG32(mmGRBM_SOFT_RESET, tmp); 5298 tmp = RREG32(mmGRBM_SOFT_RESET); 5299 } 5300 5301 if (srbm_soft_reset) { 5302 tmp = RREG32(mmSRBM_SOFT_RESET); 5303 tmp |= srbm_soft_reset; 5304 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5305 WREG32(mmSRBM_SOFT_RESET, tmp); 5306 tmp = RREG32(mmSRBM_SOFT_RESET); 5307 5308 udelay(50); 5309 5310 tmp &= ~srbm_soft_reset; 5311 WREG32(mmSRBM_SOFT_RESET, tmp); 5312 tmp = RREG32(mmSRBM_SOFT_RESET); 5313 } 5314 5315 if (grbm_soft_reset || srbm_soft_reset) { 5316 tmp = RREG32(mmGMCON_DEBUG); 5317 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5318 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5319 WREG32(mmGMCON_DEBUG, tmp); 5320 } 5321 5322 /* Wait a little for things to settle down */ 5323 udelay(50); 5324 5325 return 0; 5326 } 5327 5328 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5329 struct amdgpu_ring *ring) 5330 { 5331 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5332 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5333 WREG32(mmCP_HQD_PQ_RPTR, 0); 5334 WREG32(mmCP_HQD_PQ_WPTR, 0); 5335 vi_srbm_select(adev, 0, 0, 0, 0); 5336 } 5337 5338 static int gfx_v8_0_post_soft_reset(void *handle) 5339 { 5340 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5341 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5342 5343 if ((!adev->gfx.grbm_soft_reset) && 5344 (!adev->gfx.srbm_soft_reset)) 5345 return 0; 5346 5347 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5348 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5349 5350 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5351 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5352 gfx_v8_0_cp_gfx_resume(adev); 5353 5354 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5355 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5358 int i; 5359 5360 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5361 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5362 5363 gfx_v8_0_init_hqd(adev, ring); 5364 } 5365 gfx_v8_0_cp_compute_resume(adev); 5366 } 5367 gfx_v8_0_rlc_start(adev); 5368 5369 return 0; 5370 } 5371 5372 /** 5373 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5374 * 5375 * @adev: amdgpu_device pointer 5376 * 5377 * Fetches a GPU clock counter snapshot. 5378 * Returns the 64 bit clock counter snapshot. 5379 */ 5380 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5381 { 5382 uint64_t clock; 5383 5384 mutex_lock(&adev->gfx.gpu_clock_mutex); 5385 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5386 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5387 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5388 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5389 return clock; 5390 } 5391 5392 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5393 uint32_t vmid, 5394 uint32_t gds_base, uint32_t gds_size, 5395 uint32_t gws_base, uint32_t gws_size, 5396 uint32_t oa_base, uint32_t oa_size) 5397 { 5398 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5399 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5400 5401 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5402 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5403 5404 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5405 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5406 5407 /* GDS Base */ 5408 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5409 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5410 WRITE_DATA_DST_SEL(0))); 5411 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5412 amdgpu_ring_write(ring, 0); 5413 amdgpu_ring_write(ring, gds_base); 5414 5415 /* GDS Size */ 5416 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5417 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5418 WRITE_DATA_DST_SEL(0))); 5419 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5420 amdgpu_ring_write(ring, 0); 5421 amdgpu_ring_write(ring, gds_size); 5422 5423 /* GWS */ 5424 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5425 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5426 WRITE_DATA_DST_SEL(0))); 5427 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5428 amdgpu_ring_write(ring, 0); 5429 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5430 5431 /* OA */ 5432 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5433 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5434 WRITE_DATA_DST_SEL(0))); 5435 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5436 amdgpu_ring_write(ring, 0); 5437 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5438 } 5439 5440 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5441 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5442 .select_se_sh = &gfx_v8_0_select_se_sh, 5443 }; 5444 5445 static int gfx_v8_0_early_init(void *handle) 5446 { 5447 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5448 5449 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5450 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5451 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5452 gfx_v8_0_set_ring_funcs(adev); 5453 gfx_v8_0_set_irq_funcs(adev); 5454 gfx_v8_0_set_gds_init(adev); 5455 gfx_v8_0_set_rlc_funcs(adev); 5456 5457 return 0; 5458 } 5459 5460 static int gfx_v8_0_late_init(void *handle) 5461 { 5462 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5463 int r; 5464 5465 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5466 if (r) 5467 return r; 5468 5469 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5470 if (r) 5471 return r; 5472 5473 /* requires IBs so do in late init after IB pool is initialized */ 5474 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5475 if (r) 5476 return r; 5477 5478 amdgpu_set_powergating_state(adev, 5479 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5480 5481 return 0; 5482 } 5483 5484 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5485 bool enable) 5486 { 5487 if (adev->asic_type == CHIP_POLARIS11) 5488 /* Send msg to SMU via Powerplay */ 5489 amdgpu_set_powergating_state(adev, 5490 AMD_IP_BLOCK_TYPE_SMC, 5491 enable ? 5492 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5493 5494 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5495 } 5496 5497 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5498 bool enable) 5499 { 5500 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5501 } 5502 5503 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5504 bool enable) 5505 { 5506 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5507 } 5508 5509 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5510 bool enable) 5511 { 5512 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5513 } 5514 5515 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5516 bool enable) 5517 { 5518 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5519 5520 /* Read any GFX register to wake up GFX. */ 5521 if (!enable) 5522 RREG32(mmDB_RENDER_CONTROL); 5523 } 5524 5525 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5526 bool enable) 5527 { 5528 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5529 cz_enable_gfx_cg_power_gating(adev, true); 5530 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5531 cz_enable_gfx_pipeline_power_gating(adev, true); 5532 } else { 5533 cz_enable_gfx_cg_power_gating(adev, false); 5534 cz_enable_gfx_pipeline_power_gating(adev, false); 5535 } 5536 } 5537 5538 static int gfx_v8_0_set_powergating_state(void *handle, 5539 enum amd_powergating_state state) 5540 { 5541 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5542 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 5543 5544 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5545 return 0; 5546 5547 switch (adev->asic_type) { 5548 case CHIP_CARRIZO: 5549 case CHIP_STONEY: 5550 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) 5551 cz_update_gfx_cg_power_gating(adev, enable); 5552 5553 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5554 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5555 else 5556 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5557 5558 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5559 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5560 else 5561 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5562 break; 5563 case CHIP_POLARIS11: 5564 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5565 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5566 else 5567 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5568 5569 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5570 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5571 else 5572 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5573 5574 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5575 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5576 else 5577 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5578 break; 5579 default: 5580 break; 5581 } 5582 5583 return 0; 5584 } 5585 5586 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5587 uint32_t reg_addr, uint32_t cmd) 5588 { 5589 uint32_t data; 5590 5591 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5592 5593 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5594 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5595 5596 data = RREG32(mmRLC_SERDES_WR_CTRL); 5597 if (adev->asic_type == CHIP_STONEY) 5598 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5599 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5600 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5601 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5602 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5603 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5604 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5605 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5606 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5607 else 5608 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5609 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5610 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5611 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5612 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5613 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5614 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5615 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5616 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5617 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5618 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5619 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5620 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5621 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5622 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5623 5624 WREG32(mmRLC_SERDES_WR_CTRL, data); 5625 } 5626 5627 #define MSG_ENTER_RLC_SAFE_MODE 1 5628 #define MSG_EXIT_RLC_SAFE_MODE 0 5629 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5630 #define RLC_GPR_REG2__REQ__SHIFT 0 5631 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5632 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5633 5634 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5635 { 5636 u32 data = 0; 5637 unsigned i; 5638 5639 data = RREG32(mmRLC_CNTL); 5640 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5641 return; 5642 5643 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5644 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5645 AMD_PG_SUPPORT_GFX_DMG))) { 5646 data |= RLC_GPR_REG2__REQ_MASK; 5647 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5648 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5649 WREG32(mmRLC_GPR_REG2, data); 5650 5651 for (i = 0; i < adev->usec_timeout; i++) { 5652 if ((RREG32(mmRLC_GPM_STAT) & 5653 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5654 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5655 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5656 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5657 break; 5658 udelay(1); 5659 } 5660 5661 for (i = 0; i < adev->usec_timeout; i++) { 5662 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5663 break; 5664 udelay(1); 5665 } 5666 adev->gfx.rlc.in_safe_mode = true; 5667 } 5668 } 5669 5670 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5671 { 5672 u32 data; 5673 unsigned i; 5674 5675 data = RREG32(mmRLC_CNTL); 5676 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5677 return; 5678 5679 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5680 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5681 AMD_PG_SUPPORT_GFX_DMG))) { 5682 data |= RLC_GPR_REG2__REQ_MASK; 5683 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5684 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5685 WREG32(mmRLC_GPR_REG2, data); 5686 adev->gfx.rlc.in_safe_mode = false; 5687 } 5688 5689 for (i = 0; i < adev->usec_timeout; i++) { 5690 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5691 break; 5692 udelay(1); 5693 } 5694 } 5695 5696 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5697 { 5698 u32 data; 5699 unsigned i; 5700 5701 data = RREG32(mmRLC_CNTL); 5702 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5703 return; 5704 5705 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5706 data |= RLC_SAFE_MODE__CMD_MASK; 5707 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5708 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5709 WREG32(mmRLC_SAFE_MODE, data); 5710 5711 for (i = 0; i < adev->usec_timeout; i++) { 5712 if ((RREG32(mmRLC_GPM_STAT) & 5713 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5714 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5715 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5716 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5717 break; 5718 udelay(1); 5719 } 5720 5721 for (i = 0; i < adev->usec_timeout; i++) { 5722 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5723 break; 5724 udelay(1); 5725 } 5726 adev->gfx.rlc.in_safe_mode = true; 5727 } 5728 } 5729 5730 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5731 { 5732 u32 data = 0; 5733 unsigned i; 5734 5735 data = RREG32(mmRLC_CNTL); 5736 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5737 return; 5738 5739 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5740 if (adev->gfx.rlc.in_safe_mode) { 5741 data |= RLC_SAFE_MODE__CMD_MASK; 5742 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5743 WREG32(mmRLC_SAFE_MODE, data); 5744 adev->gfx.rlc.in_safe_mode = false; 5745 } 5746 } 5747 5748 for (i = 0; i < adev->usec_timeout; i++) { 5749 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5750 break; 5751 udelay(1); 5752 } 5753 } 5754 5755 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5756 { 5757 adev->gfx.rlc.in_safe_mode = true; 5758 } 5759 5760 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5761 { 5762 adev->gfx.rlc.in_safe_mode = false; 5763 } 5764 5765 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5766 .enter_safe_mode = cz_enter_rlc_safe_mode, 5767 .exit_safe_mode = cz_exit_rlc_safe_mode 5768 }; 5769 5770 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5771 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5772 .exit_safe_mode = iceland_exit_rlc_safe_mode 5773 }; 5774 5775 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5776 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5777 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5778 }; 5779 5780 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5781 bool enable) 5782 { 5783 uint32_t temp, data; 5784 5785 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5786 5787 /* It is disabled by HW by default */ 5788 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5789 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5790 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5791 /* 1 - RLC memory Light sleep */ 5792 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5793 5794 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5795 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5796 } 5797 5798 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5799 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5800 if (adev->flags & AMD_IS_APU) 5801 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5802 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5803 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5804 else 5805 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5806 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5807 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5808 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5809 5810 if (temp != data) 5811 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5812 5813 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5814 gfx_v8_0_wait_for_rlc_serdes(adev); 5815 5816 /* 5 - clear mgcg override */ 5817 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5818 5819 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5820 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5821 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5822 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5823 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5824 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5825 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5826 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5827 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5828 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5829 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5830 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5831 if (temp != data) 5832 WREG32(mmCGTS_SM_CTRL_REG, data); 5833 } 5834 udelay(50); 5835 5836 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5837 gfx_v8_0_wait_for_rlc_serdes(adev); 5838 } else { 5839 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5840 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5841 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5842 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5843 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5844 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5845 if (temp != data) 5846 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5847 5848 /* 2 - disable MGLS in RLC */ 5849 data = RREG32(mmRLC_MEM_SLP_CNTL); 5850 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5851 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5852 WREG32(mmRLC_MEM_SLP_CNTL, data); 5853 } 5854 5855 /* 3 - disable MGLS in CP */ 5856 data = RREG32(mmCP_MEM_SLP_CNTL); 5857 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5858 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5859 WREG32(mmCP_MEM_SLP_CNTL, data); 5860 } 5861 5862 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5863 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5864 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5865 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5866 if (temp != data) 5867 WREG32(mmCGTS_SM_CTRL_REG, data); 5868 5869 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5870 gfx_v8_0_wait_for_rlc_serdes(adev); 5871 5872 /* 6 - set mgcg override */ 5873 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5874 5875 udelay(50); 5876 5877 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5878 gfx_v8_0_wait_for_rlc_serdes(adev); 5879 } 5880 5881 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5882 } 5883 5884 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5885 bool enable) 5886 { 5887 uint32_t temp, temp1, data, data1; 5888 5889 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5890 5891 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5892 5893 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5894 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5895 * Cmp_busy/GFX_Idle interrupts 5896 */ 5897 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5898 5899 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5900 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5901 if (temp1 != data1) 5902 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5903 5904 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5905 gfx_v8_0_wait_for_rlc_serdes(adev); 5906 5907 /* 3 - clear cgcg override */ 5908 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5909 5910 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5911 gfx_v8_0_wait_for_rlc_serdes(adev); 5912 5913 /* 4 - write cmd to set CGLS */ 5914 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5915 5916 /* 5 - enable cgcg */ 5917 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5918 5919 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5920 /* enable cgls*/ 5921 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5922 5923 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5924 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5925 5926 if (temp1 != data1) 5927 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5928 } else { 5929 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5930 } 5931 5932 if (temp != data) 5933 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5934 } else { 5935 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5936 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5937 5938 /* TEST CGCG */ 5939 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5940 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5941 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5942 if (temp1 != data1) 5943 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5944 5945 /* read gfx register to wake up cgcg */ 5946 RREG32(mmCB_CGTT_SCLK_CTRL); 5947 RREG32(mmCB_CGTT_SCLK_CTRL); 5948 RREG32(mmCB_CGTT_SCLK_CTRL); 5949 RREG32(mmCB_CGTT_SCLK_CTRL); 5950 5951 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5952 gfx_v8_0_wait_for_rlc_serdes(adev); 5953 5954 /* write cmd to Set CGCG Overrride */ 5955 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5956 5957 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5958 gfx_v8_0_wait_for_rlc_serdes(adev); 5959 5960 /* write cmd to Clear CGLS */ 5961 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5962 5963 /* disable cgcg, cgls should be disabled too. */ 5964 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5965 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5966 if (temp != data) 5967 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5968 } 5969 5970 gfx_v8_0_wait_for_rlc_serdes(adev); 5971 5972 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5973 } 5974 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5975 bool enable) 5976 { 5977 if (enable) { 5978 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5979 * === MGCG + MGLS + TS(CG/LS) === 5980 */ 5981 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5982 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5983 } else { 5984 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5985 * === CGCG + CGLS === 5986 */ 5987 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5988 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5989 } 5990 return 0; 5991 } 5992 5993 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5994 enum amd_clockgating_state state) 5995 { 5996 uint32_t msg_id, pp_state; 5997 void *pp_handle = adev->powerplay.pp_handle; 5998 5999 if (state == AMD_CG_STATE_UNGATE) 6000 pp_state = 0; 6001 else 6002 pp_state = PP_STATE_CG | PP_STATE_LS; 6003 6004 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6005 PP_BLOCK_GFX_CG, 6006 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6007 pp_state); 6008 amd_set_clockgating_by_smu(pp_handle, msg_id); 6009 6010 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6011 PP_BLOCK_GFX_MG, 6012 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6013 pp_state); 6014 amd_set_clockgating_by_smu(pp_handle, msg_id); 6015 6016 return 0; 6017 } 6018 6019 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6020 enum amd_clockgating_state state) 6021 { 6022 uint32_t msg_id, pp_state; 6023 void *pp_handle = adev->powerplay.pp_handle; 6024 6025 if (state == AMD_CG_STATE_UNGATE) 6026 pp_state = 0; 6027 else 6028 pp_state = PP_STATE_CG | PP_STATE_LS; 6029 6030 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6031 PP_BLOCK_GFX_CG, 6032 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6033 pp_state); 6034 amd_set_clockgating_by_smu(pp_handle, msg_id); 6035 6036 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6037 PP_BLOCK_GFX_3D, 6038 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6039 pp_state); 6040 amd_set_clockgating_by_smu(pp_handle, msg_id); 6041 6042 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6043 PP_BLOCK_GFX_MG, 6044 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6045 pp_state); 6046 amd_set_clockgating_by_smu(pp_handle, msg_id); 6047 6048 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6049 PP_BLOCK_GFX_RLC, 6050 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6051 pp_state); 6052 amd_set_clockgating_by_smu(pp_handle, msg_id); 6053 6054 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6055 PP_BLOCK_GFX_CP, 6056 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6057 pp_state); 6058 amd_set_clockgating_by_smu(pp_handle, msg_id); 6059 6060 return 0; 6061 } 6062 6063 static int gfx_v8_0_set_clockgating_state(void *handle, 6064 enum amd_clockgating_state state) 6065 { 6066 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6067 6068 switch (adev->asic_type) { 6069 case CHIP_FIJI: 6070 case CHIP_CARRIZO: 6071 case CHIP_STONEY: 6072 gfx_v8_0_update_gfx_clock_gating(adev, 6073 state == AMD_CG_STATE_GATE ? true : false); 6074 break; 6075 case CHIP_TONGA: 6076 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6077 break; 6078 case CHIP_POLARIS10: 6079 case CHIP_POLARIS11: 6080 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6081 break; 6082 default: 6083 break; 6084 } 6085 return 0; 6086 } 6087 6088 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6089 { 6090 return ring->adev->wb.wb[ring->rptr_offs]; 6091 } 6092 6093 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6094 { 6095 struct amdgpu_device *adev = ring->adev; 6096 6097 if (ring->use_doorbell) 6098 /* XXX check if swapping is necessary on BE */ 6099 return ring->adev->wb.wb[ring->wptr_offs]; 6100 else 6101 return RREG32(mmCP_RB0_WPTR); 6102 } 6103 6104 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6105 { 6106 struct amdgpu_device *adev = ring->adev; 6107 6108 if (ring->use_doorbell) { 6109 /* XXX check if swapping is necessary on BE */ 6110 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6111 WDOORBELL32(ring->doorbell_index, ring->wptr); 6112 } else { 6113 WREG32(mmCP_RB0_WPTR, ring->wptr); 6114 (void)RREG32(mmCP_RB0_WPTR); 6115 } 6116 } 6117 6118 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6119 { 6120 u32 ref_and_mask, reg_mem_engine; 6121 6122 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) { 6123 switch (ring->me) { 6124 case 1: 6125 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6126 break; 6127 case 2: 6128 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6129 break; 6130 default: 6131 return; 6132 } 6133 reg_mem_engine = 0; 6134 } else { 6135 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6136 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6137 } 6138 6139 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6140 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6141 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6142 reg_mem_engine)); 6143 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6144 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6145 amdgpu_ring_write(ring, ref_and_mask); 6146 amdgpu_ring_write(ring, ref_and_mask); 6147 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6148 } 6149 6150 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6151 { 6152 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6153 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6154 WRITE_DATA_DST_SEL(0) | 6155 WR_CONFIRM)); 6156 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6157 amdgpu_ring_write(ring, 0); 6158 amdgpu_ring_write(ring, 1); 6159 6160 } 6161 6162 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6163 struct amdgpu_ib *ib, 6164 unsigned vm_id, bool ctx_switch) 6165 { 6166 u32 header, control = 0; 6167 6168 if (ib->flags & AMDGPU_IB_FLAG_CE) 6169 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6170 else 6171 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6172 6173 control |= ib->length_dw | (vm_id << 24); 6174 6175 amdgpu_ring_write(ring, header); 6176 amdgpu_ring_write(ring, 6177 #ifdef __BIG_ENDIAN 6178 (2 << 0) | 6179 #endif 6180 (ib->gpu_addr & 0xFFFFFFFC)); 6181 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6182 amdgpu_ring_write(ring, control); 6183 } 6184 6185 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6186 struct amdgpu_ib *ib, 6187 unsigned vm_id, bool ctx_switch) 6188 { 6189 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6190 6191 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6192 amdgpu_ring_write(ring, 6193 #ifdef __BIG_ENDIAN 6194 (2 << 0) | 6195 #endif 6196 (ib->gpu_addr & 0xFFFFFFFC)); 6197 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6198 amdgpu_ring_write(ring, control); 6199 } 6200 6201 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6202 u64 seq, unsigned flags) 6203 { 6204 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6205 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6206 6207 /* EVENT_WRITE_EOP - flush caches, send int */ 6208 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6209 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6210 EOP_TC_ACTION_EN | 6211 EOP_TC_WB_ACTION_EN | 6212 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6213 EVENT_INDEX(5))); 6214 amdgpu_ring_write(ring, addr & 0xfffffffc); 6215 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6216 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6217 amdgpu_ring_write(ring, lower_32_bits(seq)); 6218 amdgpu_ring_write(ring, upper_32_bits(seq)); 6219 6220 } 6221 6222 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6223 { 6224 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 6225 uint32_t seq = ring->fence_drv.sync_seq; 6226 uint64_t addr = ring->fence_drv.gpu_addr; 6227 6228 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6229 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6230 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6231 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6232 amdgpu_ring_write(ring, addr & 0xfffffffc); 6233 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6234 amdgpu_ring_write(ring, seq); 6235 amdgpu_ring_write(ring, 0xffffffff); 6236 amdgpu_ring_write(ring, 4); /* poll interval */ 6237 } 6238 6239 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6240 unsigned vm_id, uint64_t pd_addr) 6241 { 6242 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 6243 6244 /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */ 6245 if (usepfp) 6246 amdgpu_ring_insert_nop(ring, 128); 6247 6248 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6249 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6250 WRITE_DATA_DST_SEL(0)) | 6251 WR_CONFIRM); 6252 if (vm_id < 8) { 6253 amdgpu_ring_write(ring, 6254 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6255 } else { 6256 amdgpu_ring_write(ring, 6257 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6258 } 6259 amdgpu_ring_write(ring, 0); 6260 amdgpu_ring_write(ring, pd_addr >> 12); 6261 6262 /* bits 0-15 are the VM contexts0-15 */ 6263 /* invalidate the cache */ 6264 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6265 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6266 WRITE_DATA_DST_SEL(0))); 6267 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6268 amdgpu_ring_write(ring, 0); 6269 amdgpu_ring_write(ring, 1 << vm_id); 6270 6271 /* wait for the invalidate to complete */ 6272 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6273 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6274 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6275 WAIT_REG_MEM_ENGINE(0))); /* me */ 6276 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6277 amdgpu_ring_write(ring, 0); 6278 amdgpu_ring_write(ring, 0); /* ref */ 6279 amdgpu_ring_write(ring, 0); /* mask */ 6280 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6281 6282 /* compute doesn't have PFP */ 6283 if (usepfp) { 6284 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6285 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6286 amdgpu_ring_write(ring, 0x0); 6287 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ 6288 amdgpu_ring_insert_nop(ring, 128); 6289 } 6290 } 6291 6292 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6293 { 6294 return ring->adev->wb.wb[ring->wptr_offs]; 6295 } 6296 6297 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6298 { 6299 struct amdgpu_device *adev = ring->adev; 6300 6301 /* XXX check if swapping is necessary on BE */ 6302 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6303 WDOORBELL32(ring->doorbell_index, ring->wptr); 6304 } 6305 6306 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6307 u64 addr, u64 seq, 6308 unsigned flags) 6309 { 6310 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6311 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6312 6313 /* RELEASE_MEM - flush caches, send int */ 6314 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6315 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6316 EOP_TC_ACTION_EN | 6317 EOP_TC_WB_ACTION_EN | 6318 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6319 EVENT_INDEX(5))); 6320 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6321 amdgpu_ring_write(ring, addr & 0xfffffffc); 6322 amdgpu_ring_write(ring, upper_32_bits(addr)); 6323 amdgpu_ring_write(ring, lower_32_bits(seq)); 6324 amdgpu_ring_write(ring, upper_32_bits(seq)); 6325 } 6326 6327 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6328 { 6329 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6330 amdgpu_ring_write(ring, 0); 6331 } 6332 6333 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6334 { 6335 uint32_t dw2 = 0; 6336 6337 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6338 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6339 /* set load_global_config & load_global_uconfig */ 6340 dw2 |= 0x8001; 6341 /* set load_cs_sh_regs */ 6342 dw2 |= 0x01000000; 6343 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6344 dw2 |= 0x10002; 6345 6346 /* set load_ce_ram if preamble presented */ 6347 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6348 dw2 |= 0x10000000; 6349 } else { 6350 /* still load_ce_ram if this is the first time preamble presented 6351 * although there is no context switch happens. 6352 */ 6353 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6354 dw2 |= 0x10000000; 6355 } 6356 6357 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6358 amdgpu_ring_write(ring, dw2); 6359 amdgpu_ring_write(ring, 0); 6360 } 6361 6362 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring) 6363 { 6364 return 6365 4; /* gfx_v8_0_ring_emit_ib_gfx */ 6366 } 6367 6368 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring) 6369 { 6370 return 6371 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6372 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6373 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6374 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 6375 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6376 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ 6377 2 + /* gfx_v8_ring_emit_sb */ 6378 3; /* gfx_v8_ring_emit_cntxcntl */ 6379 } 6380 6381 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring) 6382 { 6383 return 6384 4; /* gfx_v8_0_ring_emit_ib_compute */ 6385 } 6386 6387 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring) 6388 { 6389 return 6390 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6391 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6392 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6393 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6394 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6395 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6396 } 6397 6398 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6399 enum amdgpu_interrupt_state state) 6400 { 6401 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6402 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6403 } 6404 6405 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6406 int me, int pipe, 6407 enum amdgpu_interrupt_state state) 6408 { 6409 /* 6410 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6411 * handles the setting of interrupts for this specific pipe. All other 6412 * pipes' interrupts are set by amdkfd. 6413 */ 6414 6415 if (me == 1) { 6416 switch (pipe) { 6417 case 0: 6418 break; 6419 default: 6420 DRM_DEBUG("invalid pipe %d\n", pipe); 6421 return; 6422 } 6423 } else { 6424 DRM_DEBUG("invalid me %d\n", me); 6425 return; 6426 } 6427 6428 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6429 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6430 } 6431 6432 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6433 struct amdgpu_irq_src *source, 6434 unsigned type, 6435 enum amdgpu_interrupt_state state) 6436 { 6437 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6438 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6439 6440 return 0; 6441 } 6442 6443 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6444 struct amdgpu_irq_src *source, 6445 unsigned type, 6446 enum amdgpu_interrupt_state state) 6447 { 6448 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6449 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6450 6451 return 0; 6452 } 6453 6454 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6455 struct amdgpu_irq_src *src, 6456 unsigned type, 6457 enum amdgpu_interrupt_state state) 6458 { 6459 switch (type) { 6460 case AMDGPU_CP_IRQ_GFX_EOP: 6461 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6462 break; 6463 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6464 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6465 break; 6466 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6467 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6468 break; 6469 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6470 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6471 break; 6472 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6473 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6474 break; 6475 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6476 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6477 break; 6478 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6479 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6480 break; 6481 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6482 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6483 break; 6484 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6485 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6486 break; 6487 default: 6488 break; 6489 } 6490 return 0; 6491 } 6492 6493 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6494 struct amdgpu_irq_src *source, 6495 struct amdgpu_iv_entry *entry) 6496 { 6497 int i; 6498 u8 me_id, pipe_id, queue_id; 6499 struct amdgpu_ring *ring; 6500 6501 DRM_DEBUG("IH: CP EOP\n"); 6502 me_id = (entry->ring_id & 0x0c) >> 2; 6503 pipe_id = (entry->ring_id & 0x03) >> 0; 6504 queue_id = (entry->ring_id & 0x70) >> 4; 6505 6506 switch (me_id) { 6507 case 0: 6508 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6509 break; 6510 case 1: 6511 case 2: 6512 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6513 ring = &adev->gfx.compute_ring[i]; 6514 /* Per-queue interrupt is supported for MEC starting from VI. 6515 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6516 */ 6517 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6518 amdgpu_fence_process(ring); 6519 } 6520 break; 6521 } 6522 return 0; 6523 } 6524 6525 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6526 struct amdgpu_irq_src *source, 6527 struct amdgpu_iv_entry *entry) 6528 { 6529 DRM_ERROR("Illegal register access in command stream\n"); 6530 schedule_work(&adev->reset_work); 6531 return 0; 6532 } 6533 6534 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6535 struct amdgpu_irq_src *source, 6536 struct amdgpu_iv_entry *entry) 6537 { 6538 DRM_ERROR("Illegal instruction in command stream\n"); 6539 schedule_work(&adev->reset_work); 6540 return 0; 6541 } 6542 6543 const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6544 .name = "gfx_v8_0", 6545 .early_init = gfx_v8_0_early_init, 6546 .late_init = gfx_v8_0_late_init, 6547 .sw_init = gfx_v8_0_sw_init, 6548 .sw_fini = gfx_v8_0_sw_fini, 6549 .hw_init = gfx_v8_0_hw_init, 6550 .hw_fini = gfx_v8_0_hw_fini, 6551 .suspend = gfx_v8_0_suspend, 6552 .resume = gfx_v8_0_resume, 6553 .is_idle = gfx_v8_0_is_idle, 6554 .wait_for_idle = gfx_v8_0_wait_for_idle, 6555 .check_soft_reset = gfx_v8_0_check_soft_reset, 6556 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6557 .soft_reset = gfx_v8_0_soft_reset, 6558 .post_soft_reset = gfx_v8_0_post_soft_reset, 6559 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6560 .set_powergating_state = gfx_v8_0_set_powergating_state, 6561 }; 6562 6563 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6564 .get_rptr = gfx_v8_0_ring_get_rptr, 6565 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6566 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6567 .parse_cs = NULL, 6568 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6569 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6570 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6571 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6572 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6573 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6574 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6575 .test_ring = gfx_v8_0_ring_test_ring, 6576 .test_ib = gfx_v8_0_ring_test_ib, 6577 .insert_nop = amdgpu_ring_insert_nop, 6578 .pad_ib = amdgpu_ring_generic_pad_ib, 6579 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6580 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6581 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx, 6582 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx, 6583 }; 6584 6585 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6586 .get_rptr = gfx_v8_0_ring_get_rptr, 6587 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6588 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6589 .parse_cs = NULL, 6590 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6591 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6592 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6593 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6594 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6595 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6596 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6597 .test_ring = gfx_v8_0_ring_test_ring, 6598 .test_ib = gfx_v8_0_ring_test_ib, 6599 .insert_nop = amdgpu_ring_insert_nop, 6600 .pad_ib = amdgpu_ring_generic_pad_ib, 6601 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute, 6602 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute, 6603 }; 6604 6605 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6606 { 6607 int i; 6608 6609 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6610 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6611 6612 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6613 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6614 } 6615 6616 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6617 .set = gfx_v8_0_set_eop_interrupt_state, 6618 .process = gfx_v8_0_eop_irq, 6619 }; 6620 6621 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6622 .set = gfx_v8_0_set_priv_reg_fault_state, 6623 .process = gfx_v8_0_priv_reg_irq, 6624 }; 6625 6626 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6627 .set = gfx_v8_0_set_priv_inst_fault_state, 6628 .process = gfx_v8_0_priv_inst_irq, 6629 }; 6630 6631 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6632 { 6633 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6634 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6635 6636 adev->gfx.priv_reg_irq.num_types = 1; 6637 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6638 6639 adev->gfx.priv_inst_irq.num_types = 1; 6640 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6641 } 6642 6643 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6644 { 6645 switch (adev->asic_type) { 6646 case CHIP_TOPAZ: 6647 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6648 break; 6649 case CHIP_STONEY: 6650 case CHIP_CARRIZO: 6651 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6652 break; 6653 default: 6654 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6655 break; 6656 } 6657 } 6658 6659 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6660 { 6661 /* init asci gds info */ 6662 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6663 adev->gds.gws.total_size = 64; 6664 adev->gds.oa.total_size = 16; 6665 6666 if (adev->gds.mem.total_size == 64 * 1024) { 6667 adev->gds.mem.gfx_partition_size = 4096; 6668 adev->gds.mem.cs_partition_size = 4096; 6669 6670 adev->gds.gws.gfx_partition_size = 4; 6671 adev->gds.gws.cs_partition_size = 4; 6672 6673 adev->gds.oa.gfx_partition_size = 4; 6674 adev->gds.oa.cs_partition_size = 1; 6675 } else { 6676 adev->gds.mem.gfx_partition_size = 1024; 6677 adev->gds.mem.cs_partition_size = 1024; 6678 6679 adev->gds.gws.gfx_partition_size = 16; 6680 adev->gds.gws.cs_partition_size = 16; 6681 6682 adev->gds.oa.gfx_partition_size = 4; 6683 adev->gds.oa.cs_partition_size = 4; 6684 } 6685 } 6686 6687 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6688 u32 bitmap) 6689 { 6690 u32 data; 6691 6692 if (!bitmap) 6693 return; 6694 6695 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6696 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6697 6698 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6699 } 6700 6701 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6702 { 6703 u32 data, mask; 6704 6705 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6706 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6707 6708 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6709 6710 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6711 } 6712 6713 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6714 { 6715 int i, j, k, counter, active_cu_number = 0; 6716 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6717 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6718 unsigned disable_masks[4 * 2]; 6719 6720 memset(cu_info, 0, sizeof(*cu_info)); 6721 6722 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6723 6724 mutex_lock(&adev->grbm_idx_mutex); 6725 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6726 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6727 mask = 1; 6728 ao_bitmap = 0; 6729 counter = 0; 6730 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 6731 if (i < 4 && j < 2) 6732 gfx_v8_0_set_user_cu_inactive_bitmap( 6733 adev, disable_masks[i * 2 + j]); 6734 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6735 cu_info->bitmap[i][j] = bitmap; 6736 6737 for (k = 0; k < 16; k ++) { 6738 if (bitmap & mask) { 6739 if (counter < 2) 6740 ao_bitmap |= mask; 6741 counter ++; 6742 } 6743 mask <<= 1; 6744 } 6745 active_cu_number += counter; 6746 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6747 } 6748 } 6749 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6750 mutex_unlock(&adev->grbm_idx_mutex); 6751 6752 cu_info->number = active_cu_number; 6753 cu_info->ao_cu_mask = ao_cu_mask; 6754 } 6755