1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "amdgpu_atombios.h" 31 #include "atombios_i2c.h" 32 #include "clearstate_vi.h" 33 34 #include "gmc/gmc_8_2_d.h" 35 #include "gmc/gmc_8_2_sh_mask.h" 36 37 #include "oss/oss_3_0_d.h" 38 #include "oss/oss_3_0_sh_mask.h" 39 40 #include "bif/bif_5_0_d.h" 41 #include "bif/bif_5_0_sh_mask.h" 42 43 #include "gca/gfx_8_0_d.h" 44 #include "gca/gfx_8_0_enum.h" 45 #include "gca/gfx_8_0_sh_mask.h" 46 #include "gca/gfx_8_0_enum.h" 47 48 #include "dce/dce_10_0_d.h" 49 #include "dce/dce_10_0_sh_mask.h" 50 51 #include "smu/smu_7_1_3_d.h" 52 53 #define GFX8_NUM_GFX_RINGS 1 54 #define GFX8_NUM_COMPUTE_RINGS 8 55 56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 60 61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 70 71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 77 78 /* BPM SERDES CMD */ 79 #define SET_BPM_SERDES_CMD 1 80 #define CLE_BPM_SERDES_CMD 0 81 82 /* BPM Register Address*/ 83 enum { 84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 89 BPM_REG_FGCG_MAX 90 }; 91 92 #define RLC_FormatDirectRegListLength 14 93 94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 100 101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 106 107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 119 120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 140 141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 142 { 143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 159 }; 160 161 static const u32 golden_settings_tonga_a11[] = 162 { 163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 165 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 166 mmGB_GPU_ID, 0x0000000f, 0x00000000, 167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 172 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 178 }; 179 180 static const u32 tonga_golden_common_all[] = 181 { 182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 190 }; 191 192 static const u32 tonga_mgcg_cgcg_init[] = 193 { 194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 269 }; 270 271 static const u32 golden_settings_polaris11_a11[] = 272 { 273 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 274 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 275 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 276 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 277 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 278 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 279 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 280 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 281 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 282 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 283 mmSQ_CONFIG, 0x07f80000, 0x01180000, 284 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 285 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 286 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 287 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 288 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 289 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 290 }; 291 292 static const u32 polaris11_golden_common_all[] = 293 { 294 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 295 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 296 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 297 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 298 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 299 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 300 }; 301 302 static const u32 golden_settings_polaris10_a11[] = 303 { 304 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 305 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 306 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 307 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 308 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 309 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 310 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 311 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 312 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 313 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 314 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 315 mmSQ_CONFIG, 0x07f80000, 0x07180000, 316 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 317 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 318 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 319 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 320 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 321 }; 322 323 static const u32 polaris10_golden_common_all[] = 324 { 325 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 326 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 327 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 333 }; 334 335 static const u32 fiji_golden_common_all[] = 336 { 337 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 338 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 339 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 340 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 341 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 342 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 343 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 344 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 345 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 346 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 347 }; 348 349 static const u32 golden_settings_fiji_a10[] = 350 { 351 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 352 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 353 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 354 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 355 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 356 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 357 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 358 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 359 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 360 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 361 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 362 }; 363 364 static const u32 fiji_mgcg_cgcg_init[] = 365 { 366 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 367 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 368 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 369 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 370 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 371 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 373 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 375 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 377 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 382 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 384 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 385 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 386 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 387 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 388 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 391 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 392 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 393 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 394 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 395 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 396 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 397 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 398 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 399 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 400 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 401 }; 402 403 static const u32 golden_settings_iceland_a11[] = 404 { 405 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 406 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 407 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 408 mmGB_GPU_ID, 0x0000000f, 0x00000000, 409 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 410 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 411 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 412 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 413 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 414 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 415 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 416 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 417 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 418 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 419 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 420 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 421 }; 422 423 static const u32 iceland_golden_common_all[] = 424 { 425 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 426 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 427 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 428 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 429 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 430 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 431 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 432 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 433 }; 434 435 static const u32 iceland_mgcg_cgcg_init[] = 436 { 437 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 438 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 439 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 441 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 442 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 443 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 444 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 446 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 448 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 452 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 455 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 456 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 457 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 458 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 459 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 460 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 462 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 463 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 464 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 465 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 466 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 467 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 468 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 469 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 470 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 471 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 472 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 473 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 474 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 475 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 476 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 477 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 478 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 479 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 480 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 481 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 482 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 483 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 484 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 485 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 486 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 487 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 488 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 489 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 490 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 491 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 492 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 493 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 494 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 495 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 496 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 497 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 498 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 499 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 500 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 501 }; 502 503 static const u32 cz_golden_settings_a11[] = 504 { 505 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 506 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 507 mmGB_GPU_ID, 0x0000000f, 0x00000000, 508 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 509 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 510 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 511 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 512 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 513 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 514 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 515 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 516 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 517 }; 518 519 static const u32 cz_golden_common_all[] = 520 { 521 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 522 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 523 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 524 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 525 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 526 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 527 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 528 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 529 }; 530 531 static const u32 cz_mgcg_cgcg_init[] = 532 { 533 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 534 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 535 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 536 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 537 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 541 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 542 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 543 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 544 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 551 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 552 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 553 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 554 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 555 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 558 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 559 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 560 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 561 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 562 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 563 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 564 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 565 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 566 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 567 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 568 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 569 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 570 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 571 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 572 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 573 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 574 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 575 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 576 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 577 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 578 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 579 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 580 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 581 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 582 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 583 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 584 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 585 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 586 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 587 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 588 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 589 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 590 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 591 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 592 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 593 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 594 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 595 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 596 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 597 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 598 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 599 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 600 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 601 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 602 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 603 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 604 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 605 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 606 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 607 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 608 }; 609 610 static const u32 stoney_golden_settings_a11[] = 611 { 612 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 613 mmGB_GPU_ID, 0x0000000f, 0x00000000, 614 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 615 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 616 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 617 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 618 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 619 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 620 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 621 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 622 }; 623 624 static const u32 stoney_golden_common_all[] = 625 { 626 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 627 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 628 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 629 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 630 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 631 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 632 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 633 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 634 }; 635 636 static const u32 stoney_mgcg_cgcg_init[] = 637 { 638 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 639 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 640 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 641 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 642 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 643 mmATC_MISC_CG, 0xffffffff, 0x000c0200, 644 }; 645 646 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 647 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 648 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 649 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 650 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 651 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 652 653 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 654 { 655 switch (adev->asic_type) { 656 case CHIP_TOPAZ: 657 amdgpu_program_register_sequence(adev, 658 iceland_mgcg_cgcg_init, 659 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 660 amdgpu_program_register_sequence(adev, 661 golden_settings_iceland_a11, 662 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 663 amdgpu_program_register_sequence(adev, 664 iceland_golden_common_all, 665 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 666 break; 667 case CHIP_FIJI: 668 amdgpu_program_register_sequence(adev, 669 fiji_mgcg_cgcg_init, 670 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 671 amdgpu_program_register_sequence(adev, 672 golden_settings_fiji_a10, 673 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 674 amdgpu_program_register_sequence(adev, 675 fiji_golden_common_all, 676 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 677 break; 678 679 case CHIP_TONGA: 680 amdgpu_program_register_sequence(adev, 681 tonga_mgcg_cgcg_init, 682 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 683 amdgpu_program_register_sequence(adev, 684 golden_settings_tonga_a11, 685 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 686 amdgpu_program_register_sequence(adev, 687 tonga_golden_common_all, 688 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 689 break; 690 case CHIP_POLARIS11: 691 amdgpu_program_register_sequence(adev, 692 golden_settings_polaris11_a11, 693 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 694 amdgpu_program_register_sequence(adev, 695 polaris11_golden_common_all, 696 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 697 break; 698 case CHIP_POLARIS10: 699 amdgpu_program_register_sequence(adev, 700 golden_settings_polaris10_a11, 701 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 702 amdgpu_program_register_sequence(adev, 703 polaris10_golden_common_all, 704 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 705 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 706 if (adev->pdev->revision == 0xc7 && 707 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 708 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 709 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 710 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 711 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 712 } 713 break; 714 case CHIP_CARRIZO: 715 amdgpu_program_register_sequence(adev, 716 cz_mgcg_cgcg_init, 717 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 718 amdgpu_program_register_sequence(adev, 719 cz_golden_settings_a11, 720 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 721 amdgpu_program_register_sequence(adev, 722 cz_golden_common_all, 723 (const u32)ARRAY_SIZE(cz_golden_common_all)); 724 break; 725 case CHIP_STONEY: 726 amdgpu_program_register_sequence(adev, 727 stoney_mgcg_cgcg_init, 728 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 729 amdgpu_program_register_sequence(adev, 730 stoney_golden_settings_a11, 731 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 732 amdgpu_program_register_sequence(adev, 733 stoney_golden_common_all, 734 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 735 break; 736 default: 737 break; 738 } 739 } 740 741 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 742 { 743 int i; 744 745 adev->gfx.scratch.num_reg = 7; 746 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 747 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 748 adev->gfx.scratch.free[i] = true; 749 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 750 } 751 } 752 753 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 754 { 755 struct amdgpu_device *adev = ring->adev; 756 uint32_t scratch; 757 uint32_t tmp = 0; 758 unsigned i; 759 int r; 760 761 r = amdgpu_gfx_scratch_get(adev, &scratch); 762 if (r) { 763 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 764 return r; 765 } 766 WREG32(scratch, 0xCAFEDEAD); 767 r = amdgpu_ring_alloc(ring, 3); 768 if (r) { 769 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 770 ring->idx, r); 771 amdgpu_gfx_scratch_free(adev, scratch); 772 return r; 773 } 774 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 775 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 776 amdgpu_ring_write(ring, 0xDEADBEEF); 777 amdgpu_ring_commit(ring); 778 779 for (i = 0; i < adev->usec_timeout; i++) { 780 tmp = RREG32(scratch); 781 if (tmp == 0xDEADBEEF) 782 break; 783 DRM_UDELAY(1); 784 } 785 if (i < adev->usec_timeout) { 786 DRM_INFO("ring test on %d succeeded in %d usecs\n", 787 ring->idx, i); 788 } else { 789 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 790 ring->idx, scratch, tmp); 791 r = -EINVAL; 792 } 793 amdgpu_gfx_scratch_free(adev, scratch); 794 return r; 795 } 796 797 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 798 { 799 struct amdgpu_device *adev = ring->adev; 800 struct amdgpu_ib ib; 801 struct fence *f = NULL; 802 uint32_t scratch; 803 uint32_t tmp = 0; 804 long r; 805 806 r = amdgpu_gfx_scratch_get(adev, &scratch); 807 if (r) { 808 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 809 return r; 810 } 811 WREG32(scratch, 0xCAFEDEAD); 812 memset(&ib, 0, sizeof(ib)); 813 r = amdgpu_ib_get(adev, NULL, 256, &ib); 814 if (r) { 815 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 816 goto err1; 817 } 818 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 819 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 820 ib.ptr[2] = 0xDEADBEEF; 821 ib.length_dw = 3; 822 823 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 824 if (r) 825 goto err2; 826 827 r = fence_wait_timeout(f, false, timeout); 828 if (r == 0) { 829 DRM_ERROR("amdgpu: IB test timed out.\n"); 830 r = -ETIMEDOUT; 831 goto err2; 832 } else if (r < 0) { 833 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 834 goto err2; 835 } 836 tmp = RREG32(scratch); 837 if (tmp == 0xDEADBEEF) { 838 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 839 r = 0; 840 } else { 841 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 842 scratch, tmp); 843 r = -EINVAL; 844 } 845 err2: 846 amdgpu_ib_free(adev, &ib, NULL); 847 fence_put(f); 848 err1: 849 amdgpu_gfx_scratch_free(adev, scratch); 850 return r; 851 } 852 853 854 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 855 release_firmware(adev->gfx.pfp_fw); 856 adev->gfx.pfp_fw = NULL; 857 release_firmware(adev->gfx.me_fw); 858 adev->gfx.me_fw = NULL; 859 release_firmware(adev->gfx.ce_fw); 860 adev->gfx.ce_fw = NULL; 861 release_firmware(adev->gfx.rlc_fw); 862 adev->gfx.rlc_fw = NULL; 863 release_firmware(adev->gfx.mec_fw); 864 adev->gfx.mec_fw = NULL; 865 if ((adev->asic_type != CHIP_STONEY) && 866 (adev->asic_type != CHIP_TOPAZ)) 867 release_firmware(adev->gfx.mec2_fw); 868 adev->gfx.mec2_fw = NULL; 869 870 kfree(adev->gfx.rlc.register_list_format); 871 } 872 873 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 874 { 875 const char *chip_name; 876 char fw_name[30]; 877 int err; 878 struct amdgpu_firmware_info *info = NULL; 879 const struct common_firmware_header *header = NULL; 880 const struct gfx_firmware_header_v1_0 *cp_hdr; 881 const struct rlc_firmware_header_v2_0 *rlc_hdr; 882 unsigned int *tmp = NULL, i; 883 884 DRM_DEBUG("\n"); 885 886 switch (adev->asic_type) { 887 case CHIP_TOPAZ: 888 chip_name = "topaz"; 889 break; 890 case CHIP_TONGA: 891 chip_name = "tonga"; 892 break; 893 case CHIP_CARRIZO: 894 chip_name = "carrizo"; 895 break; 896 case CHIP_FIJI: 897 chip_name = "fiji"; 898 break; 899 case CHIP_POLARIS11: 900 chip_name = "polaris11"; 901 break; 902 case CHIP_POLARIS10: 903 chip_name = "polaris10"; 904 break; 905 case CHIP_STONEY: 906 chip_name = "stoney"; 907 break; 908 default: 909 BUG(); 910 } 911 912 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 913 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 914 if (err) 915 goto out; 916 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 917 if (err) 918 goto out; 919 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 920 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 921 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 922 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 924 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 925 if (err) 926 goto out; 927 err = amdgpu_ucode_validate(adev->gfx.me_fw); 928 if (err) 929 goto out; 930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 931 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 932 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 933 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 935 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 936 if (err) 937 goto out; 938 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 939 if (err) 940 goto out; 941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 942 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 943 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 944 945 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 946 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 947 if (err) 948 goto out; 949 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 950 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 951 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 952 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 953 954 adev->gfx.rlc.save_and_restore_offset = 955 le32_to_cpu(rlc_hdr->save_and_restore_offset); 956 adev->gfx.rlc.clear_state_descriptor_offset = 957 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 958 adev->gfx.rlc.avail_scratch_ram_locations = 959 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 960 adev->gfx.rlc.reg_restore_list_size = 961 le32_to_cpu(rlc_hdr->reg_restore_list_size); 962 adev->gfx.rlc.reg_list_format_start = 963 le32_to_cpu(rlc_hdr->reg_list_format_start); 964 adev->gfx.rlc.reg_list_format_separate_start = 965 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 966 adev->gfx.rlc.starting_offsets_start = 967 le32_to_cpu(rlc_hdr->starting_offsets_start); 968 adev->gfx.rlc.reg_list_format_size_bytes = 969 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 970 adev->gfx.rlc.reg_list_size_bytes = 971 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 972 973 adev->gfx.rlc.register_list_format = 974 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 975 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 976 977 if (!adev->gfx.rlc.register_list_format) { 978 err = -ENOMEM; 979 goto out; 980 } 981 982 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 983 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 984 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 985 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 986 987 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 988 989 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 990 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 991 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 992 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 993 994 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 995 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 996 if (err) 997 goto out; 998 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 999 if (err) 1000 goto out; 1001 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1002 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1003 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1004 1005 if ((adev->asic_type != CHIP_STONEY) && 1006 (adev->asic_type != CHIP_TOPAZ)) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1009 if (!err) { 1010 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1011 if (err) 1012 goto out; 1013 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1014 adev->gfx.mec2_fw->data; 1015 adev->gfx.mec2_fw_version = 1016 le32_to_cpu(cp_hdr->header.ucode_version); 1017 adev->gfx.mec2_feature_version = 1018 le32_to_cpu(cp_hdr->ucode_feature_version); 1019 } else { 1020 err = 0; 1021 adev->gfx.mec2_fw = NULL; 1022 } 1023 } 1024 1025 if (adev->firmware.smu_load) { 1026 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1027 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1028 info->fw = adev->gfx.pfp_fw; 1029 header = (const struct common_firmware_header *)info->fw->data; 1030 adev->firmware.fw_size += 1031 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1032 1033 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1034 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1035 info->fw = adev->gfx.me_fw; 1036 header = (const struct common_firmware_header *)info->fw->data; 1037 adev->firmware.fw_size += 1038 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1039 1040 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1041 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1042 info->fw = adev->gfx.ce_fw; 1043 header = (const struct common_firmware_header *)info->fw->data; 1044 adev->firmware.fw_size += 1045 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1046 1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1048 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1049 info->fw = adev->gfx.rlc_fw; 1050 header = (const struct common_firmware_header *)info->fw->data; 1051 adev->firmware.fw_size += 1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1053 1054 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1055 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1056 info->fw = adev->gfx.mec_fw; 1057 header = (const struct common_firmware_header *)info->fw->data; 1058 adev->firmware.fw_size += 1059 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1060 1061 if (adev->gfx.mec2_fw) { 1062 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1063 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1064 info->fw = adev->gfx.mec2_fw; 1065 header = (const struct common_firmware_header *)info->fw->data; 1066 adev->firmware.fw_size += 1067 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1068 } 1069 1070 } 1071 1072 out: 1073 if (err) { 1074 dev_err(adev->dev, 1075 "gfx8: Failed to load firmware \"%s\"\n", 1076 fw_name); 1077 release_firmware(adev->gfx.pfp_fw); 1078 adev->gfx.pfp_fw = NULL; 1079 release_firmware(adev->gfx.me_fw); 1080 adev->gfx.me_fw = NULL; 1081 release_firmware(adev->gfx.ce_fw); 1082 adev->gfx.ce_fw = NULL; 1083 release_firmware(adev->gfx.rlc_fw); 1084 adev->gfx.rlc_fw = NULL; 1085 release_firmware(adev->gfx.mec_fw); 1086 adev->gfx.mec_fw = NULL; 1087 release_firmware(adev->gfx.mec2_fw); 1088 adev->gfx.mec2_fw = NULL; 1089 } 1090 return err; 1091 } 1092 1093 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1094 volatile u32 *buffer) 1095 { 1096 u32 count = 0, i; 1097 const struct cs_section_def *sect = NULL; 1098 const struct cs_extent_def *ext = NULL; 1099 1100 if (adev->gfx.rlc.cs_data == NULL) 1101 return; 1102 if (buffer == NULL) 1103 return; 1104 1105 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1106 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1107 1108 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1109 buffer[count++] = cpu_to_le32(0x80000000); 1110 buffer[count++] = cpu_to_le32(0x80000000); 1111 1112 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1113 for (ext = sect->section; ext->extent != NULL; ++ext) { 1114 if (sect->id == SECT_CONTEXT) { 1115 buffer[count++] = 1116 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1117 buffer[count++] = cpu_to_le32(ext->reg_index - 1118 PACKET3_SET_CONTEXT_REG_START); 1119 for (i = 0; i < ext->reg_count; i++) 1120 buffer[count++] = cpu_to_le32(ext->extent[i]); 1121 } else { 1122 return; 1123 } 1124 } 1125 } 1126 1127 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1128 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1129 PACKET3_SET_CONTEXT_REG_START); 1130 switch (adev->asic_type) { 1131 case CHIP_TONGA: 1132 case CHIP_POLARIS10: 1133 buffer[count++] = cpu_to_le32(0x16000012); 1134 buffer[count++] = cpu_to_le32(0x0000002A); 1135 break; 1136 case CHIP_POLARIS11: 1137 buffer[count++] = cpu_to_le32(0x16000012); 1138 buffer[count++] = cpu_to_le32(0x00000000); 1139 break; 1140 case CHIP_FIJI: 1141 buffer[count++] = cpu_to_le32(0x3a00161a); 1142 buffer[count++] = cpu_to_le32(0x0000002e); 1143 break; 1144 case CHIP_TOPAZ: 1145 case CHIP_CARRIZO: 1146 buffer[count++] = cpu_to_le32(0x00000002); 1147 buffer[count++] = cpu_to_le32(0x00000000); 1148 break; 1149 case CHIP_STONEY: 1150 buffer[count++] = cpu_to_le32(0x00000000); 1151 buffer[count++] = cpu_to_le32(0x00000000); 1152 break; 1153 default: 1154 buffer[count++] = cpu_to_le32(0x00000000); 1155 buffer[count++] = cpu_to_le32(0x00000000); 1156 break; 1157 } 1158 1159 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1160 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1161 1162 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1163 buffer[count++] = cpu_to_le32(0); 1164 } 1165 1166 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1167 { 1168 const __le32 *fw_data; 1169 volatile u32 *dst_ptr; 1170 int me, i, max_me = 4; 1171 u32 bo_offset = 0; 1172 u32 table_offset, table_size; 1173 1174 if (adev->asic_type == CHIP_CARRIZO) 1175 max_me = 5; 1176 1177 /* write the cp table buffer */ 1178 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1179 for (me = 0; me < max_me; me++) { 1180 if (me == 0) { 1181 const struct gfx_firmware_header_v1_0 *hdr = 1182 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1183 fw_data = (const __le32 *) 1184 (adev->gfx.ce_fw->data + 1185 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1186 table_offset = le32_to_cpu(hdr->jt_offset); 1187 table_size = le32_to_cpu(hdr->jt_size); 1188 } else if (me == 1) { 1189 const struct gfx_firmware_header_v1_0 *hdr = 1190 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1191 fw_data = (const __le32 *) 1192 (adev->gfx.pfp_fw->data + 1193 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1194 table_offset = le32_to_cpu(hdr->jt_offset); 1195 table_size = le32_to_cpu(hdr->jt_size); 1196 } else if (me == 2) { 1197 const struct gfx_firmware_header_v1_0 *hdr = 1198 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1199 fw_data = (const __le32 *) 1200 (adev->gfx.me_fw->data + 1201 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1202 table_offset = le32_to_cpu(hdr->jt_offset); 1203 table_size = le32_to_cpu(hdr->jt_size); 1204 } else if (me == 3) { 1205 const struct gfx_firmware_header_v1_0 *hdr = 1206 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1207 fw_data = (const __le32 *) 1208 (adev->gfx.mec_fw->data + 1209 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1210 table_offset = le32_to_cpu(hdr->jt_offset); 1211 table_size = le32_to_cpu(hdr->jt_size); 1212 } else if (me == 4) { 1213 const struct gfx_firmware_header_v1_0 *hdr = 1214 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1215 fw_data = (const __le32 *) 1216 (adev->gfx.mec2_fw->data + 1217 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1218 table_offset = le32_to_cpu(hdr->jt_offset); 1219 table_size = le32_to_cpu(hdr->jt_size); 1220 } 1221 1222 for (i = 0; i < table_size; i ++) { 1223 dst_ptr[bo_offset + i] = 1224 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1225 } 1226 1227 bo_offset += table_size; 1228 } 1229 } 1230 1231 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1232 { 1233 int r; 1234 1235 /* clear state block */ 1236 if (adev->gfx.rlc.clear_state_obj) { 1237 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1238 if (unlikely(r != 0)) 1239 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1240 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1241 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1242 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1243 adev->gfx.rlc.clear_state_obj = NULL; 1244 } 1245 1246 /* jump table block */ 1247 if (adev->gfx.rlc.cp_table_obj) { 1248 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1249 if (unlikely(r != 0)) 1250 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1251 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1252 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1253 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1254 adev->gfx.rlc.cp_table_obj = NULL; 1255 } 1256 } 1257 1258 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1259 { 1260 volatile u32 *dst_ptr; 1261 u32 dws; 1262 const struct cs_section_def *cs_data; 1263 int r; 1264 1265 adev->gfx.rlc.cs_data = vi_cs_data; 1266 1267 cs_data = adev->gfx.rlc.cs_data; 1268 1269 if (cs_data) { 1270 /* clear state block */ 1271 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1272 1273 if (adev->gfx.rlc.clear_state_obj == NULL) { 1274 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1275 AMDGPU_GEM_DOMAIN_VRAM, 1276 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1277 NULL, NULL, 1278 &adev->gfx.rlc.clear_state_obj); 1279 if (r) { 1280 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1281 gfx_v8_0_rlc_fini(adev); 1282 return r; 1283 } 1284 } 1285 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1286 if (unlikely(r != 0)) { 1287 gfx_v8_0_rlc_fini(adev); 1288 return r; 1289 } 1290 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1291 &adev->gfx.rlc.clear_state_gpu_addr); 1292 if (r) { 1293 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1294 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1295 gfx_v8_0_rlc_fini(adev); 1296 return r; 1297 } 1298 1299 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1300 if (r) { 1301 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1302 gfx_v8_0_rlc_fini(adev); 1303 return r; 1304 } 1305 /* set up the cs buffer */ 1306 dst_ptr = adev->gfx.rlc.cs_ptr; 1307 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1308 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1309 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1310 } 1311 1312 if ((adev->asic_type == CHIP_CARRIZO) || 1313 (adev->asic_type == CHIP_STONEY)) { 1314 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1315 if (adev->gfx.rlc.cp_table_obj == NULL) { 1316 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1317 AMDGPU_GEM_DOMAIN_VRAM, 1318 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1319 NULL, NULL, 1320 &adev->gfx.rlc.cp_table_obj); 1321 if (r) { 1322 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1323 return r; 1324 } 1325 } 1326 1327 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1328 if (unlikely(r != 0)) { 1329 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1330 return r; 1331 } 1332 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1333 &adev->gfx.rlc.cp_table_gpu_addr); 1334 if (r) { 1335 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1336 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1337 return r; 1338 } 1339 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1340 if (r) { 1341 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1342 return r; 1343 } 1344 1345 cz_init_cp_jump_table(adev); 1346 1347 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1348 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1349 } 1350 1351 return 0; 1352 } 1353 1354 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1355 { 1356 int r; 1357 1358 if (adev->gfx.mec.hpd_eop_obj) { 1359 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1360 if (unlikely(r != 0)) 1361 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1362 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1363 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1364 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1365 adev->gfx.mec.hpd_eop_obj = NULL; 1366 } 1367 } 1368 1369 #define MEC_HPD_SIZE 2048 1370 1371 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1372 { 1373 int r; 1374 u32 *hpd; 1375 1376 /* 1377 * we assign only 1 pipe because all other pipes will 1378 * be handled by KFD 1379 */ 1380 adev->gfx.mec.num_mec = 1; 1381 adev->gfx.mec.num_pipe = 1; 1382 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1383 1384 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1385 r = amdgpu_bo_create(adev, 1386 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 1387 PAGE_SIZE, true, 1388 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1389 &adev->gfx.mec.hpd_eop_obj); 1390 if (r) { 1391 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1392 return r; 1393 } 1394 } 1395 1396 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1397 if (unlikely(r != 0)) { 1398 gfx_v8_0_mec_fini(adev); 1399 return r; 1400 } 1401 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1402 &adev->gfx.mec.hpd_eop_gpu_addr); 1403 if (r) { 1404 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1405 gfx_v8_0_mec_fini(adev); 1406 return r; 1407 } 1408 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1409 if (r) { 1410 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1411 gfx_v8_0_mec_fini(adev); 1412 return r; 1413 } 1414 1415 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 1416 1417 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1418 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1419 1420 return 0; 1421 } 1422 1423 static const u32 vgpr_init_compute_shader[] = 1424 { 1425 0x7e000209, 0x7e020208, 1426 0x7e040207, 0x7e060206, 1427 0x7e080205, 0x7e0a0204, 1428 0x7e0c0203, 0x7e0e0202, 1429 0x7e100201, 0x7e120200, 1430 0x7e140209, 0x7e160208, 1431 0x7e180207, 0x7e1a0206, 1432 0x7e1c0205, 0x7e1e0204, 1433 0x7e200203, 0x7e220202, 1434 0x7e240201, 0x7e260200, 1435 0x7e280209, 0x7e2a0208, 1436 0x7e2c0207, 0x7e2e0206, 1437 0x7e300205, 0x7e320204, 1438 0x7e340203, 0x7e360202, 1439 0x7e380201, 0x7e3a0200, 1440 0x7e3c0209, 0x7e3e0208, 1441 0x7e400207, 0x7e420206, 1442 0x7e440205, 0x7e460204, 1443 0x7e480203, 0x7e4a0202, 1444 0x7e4c0201, 0x7e4e0200, 1445 0x7e500209, 0x7e520208, 1446 0x7e540207, 0x7e560206, 1447 0x7e580205, 0x7e5a0204, 1448 0x7e5c0203, 0x7e5e0202, 1449 0x7e600201, 0x7e620200, 1450 0x7e640209, 0x7e660208, 1451 0x7e680207, 0x7e6a0206, 1452 0x7e6c0205, 0x7e6e0204, 1453 0x7e700203, 0x7e720202, 1454 0x7e740201, 0x7e760200, 1455 0x7e780209, 0x7e7a0208, 1456 0x7e7c0207, 0x7e7e0206, 1457 0xbf8a0000, 0xbf810000, 1458 }; 1459 1460 static const u32 sgpr_init_compute_shader[] = 1461 { 1462 0xbe8a0100, 0xbe8c0102, 1463 0xbe8e0104, 0xbe900106, 1464 0xbe920108, 0xbe940100, 1465 0xbe960102, 0xbe980104, 1466 0xbe9a0106, 0xbe9c0108, 1467 0xbe9e0100, 0xbea00102, 1468 0xbea20104, 0xbea40106, 1469 0xbea60108, 0xbea80100, 1470 0xbeaa0102, 0xbeac0104, 1471 0xbeae0106, 0xbeb00108, 1472 0xbeb20100, 0xbeb40102, 1473 0xbeb60104, 0xbeb80106, 1474 0xbeba0108, 0xbebc0100, 1475 0xbebe0102, 0xbec00104, 1476 0xbec20106, 0xbec40108, 1477 0xbec60100, 0xbec80102, 1478 0xbee60004, 0xbee70005, 1479 0xbeea0006, 0xbeeb0007, 1480 0xbee80008, 0xbee90009, 1481 0xbefc0000, 0xbf8a0000, 1482 0xbf810000, 0x00000000, 1483 }; 1484 1485 static const u32 vgpr_init_regs[] = 1486 { 1487 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1488 mmCOMPUTE_RESOURCE_LIMITS, 0, 1489 mmCOMPUTE_NUM_THREAD_X, 256*4, 1490 mmCOMPUTE_NUM_THREAD_Y, 1, 1491 mmCOMPUTE_NUM_THREAD_Z, 1, 1492 mmCOMPUTE_PGM_RSRC2, 20, 1493 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1494 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1495 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1496 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1497 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1498 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1499 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1500 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1501 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1502 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1503 }; 1504 1505 static const u32 sgpr1_init_regs[] = 1506 { 1507 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1508 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1509 mmCOMPUTE_NUM_THREAD_X, 256*5, 1510 mmCOMPUTE_NUM_THREAD_Y, 1, 1511 mmCOMPUTE_NUM_THREAD_Z, 1, 1512 mmCOMPUTE_PGM_RSRC2, 20, 1513 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1514 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1515 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1516 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1517 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1518 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1519 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1520 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1521 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1522 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1523 }; 1524 1525 static const u32 sgpr2_init_regs[] = 1526 { 1527 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1528 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1529 mmCOMPUTE_NUM_THREAD_X, 256*5, 1530 mmCOMPUTE_NUM_THREAD_Y, 1, 1531 mmCOMPUTE_NUM_THREAD_Z, 1, 1532 mmCOMPUTE_PGM_RSRC2, 20, 1533 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1534 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1535 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1536 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1537 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1538 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1539 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1540 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1541 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1542 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1543 }; 1544 1545 static const u32 sec_ded_counter_registers[] = 1546 { 1547 mmCPC_EDC_ATC_CNT, 1548 mmCPC_EDC_SCRATCH_CNT, 1549 mmCPC_EDC_UCODE_CNT, 1550 mmCPF_EDC_ATC_CNT, 1551 mmCPF_EDC_ROQ_CNT, 1552 mmCPF_EDC_TAG_CNT, 1553 mmCPG_EDC_ATC_CNT, 1554 mmCPG_EDC_DMA_CNT, 1555 mmCPG_EDC_TAG_CNT, 1556 mmDC_EDC_CSINVOC_CNT, 1557 mmDC_EDC_RESTORE_CNT, 1558 mmDC_EDC_STATE_CNT, 1559 mmGDS_EDC_CNT, 1560 mmGDS_EDC_GRBM_CNT, 1561 mmGDS_EDC_OA_DED, 1562 mmSPI_EDC_CNT, 1563 mmSQC_ATC_EDC_GATCL1_CNT, 1564 mmSQC_EDC_CNT, 1565 mmSQ_EDC_DED_CNT, 1566 mmSQ_EDC_INFO, 1567 mmSQ_EDC_SEC_CNT, 1568 mmTCC_EDC_CNT, 1569 mmTCP_ATC_EDC_GATCL1_CNT, 1570 mmTCP_EDC_CNT, 1571 mmTD_EDC_CNT 1572 }; 1573 1574 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1575 { 1576 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1577 struct amdgpu_ib ib; 1578 struct fence *f = NULL; 1579 int r, i; 1580 u32 tmp; 1581 unsigned total_size, vgpr_offset, sgpr_offset; 1582 u64 gpu_addr; 1583 1584 /* only supported on CZ */ 1585 if (adev->asic_type != CHIP_CARRIZO) 1586 return 0; 1587 1588 /* bail if the compute ring is not ready */ 1589 if (!ring->ready) 1590 return 0; 1591 1592 tmp = RREG32(mmGB_EDC_MODE); 1593 WREG32(mmGB_EDC_MODE, 0); 1594 1595 total_size = 1596 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1597 total_size += 1598 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1599 total_size += 1600 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1601 total_size = ALIGN(total_size, 256); 1602 vgpr_offset = total_size; 1603 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1604 sgpr_offset = total_size; 1605 total_size += sizeof(sgpr_init_compute_shader); 1606 1607 /* allocate an indirect buffer to put the commands in */ 1608 memset(&ib, 0, sizeof(ib)); 1609 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1610 if (r) { 1611 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1612 return r; 1613 } 1614 1615 /* load the compute shaders */ 1616 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1617 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1618 1619 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1620 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1621 1622 /* init the ib length to 0 */ 1623 ib.length_dw = 0; 1624 1625 /* VGPR */ 1626 /* write the register state for the compute dispatch */ 1627 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1628 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1629 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1630 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1631 } 1632 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1633 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1634 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1635 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1636 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1637 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1638 1639 /* write dispatch packet */ 1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1641 ib.ptr[ib.length_dw++] = 8; /* x */ 1642 ib.ptr[ib.length_dw++] = 1; /* y */ 1643 ib.ptr[ib.length_dw++] = 1; /* z */ 1644 ib.ptr[ib.length_dw++] = 1645 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1646 1647 /* write CS partial flush packet */ 1648 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1649 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1650 1651 /* SGPR1 */ 1652 /* write the register state for the compute dispatch */ 1653 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1654 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1655 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1656 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1657 } 1658 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1659 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1660 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1661 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1662 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1663 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1664 1665 /* write dispatch packet */ 1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1667 ib.ptr[ib.length_dw++] = 8; /* x */ 1668 ib.ptr[ib.length_dw++] = 1; /* y */ 1669 ib.ptr[ib.length_dw++] = 1; /* z */ 1670 ib.ptr[ib.length_dw++] = 1671 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1672 1673 /* write CS partial flush packet */ 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1675 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1676 1677 /* SGPR2 */ 1678 /* write the register state for the compute dispatch */ 1679 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1681 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1682 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1683 } 1684 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1685 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1687 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1688 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1689 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1690 1691 /* write dispatch packet */ 1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1693 ib.ptr[ib.length_dw++] = 8; /* x */ 1694 ib.ptr[ib.length_dw++] = 1; /* y */ 1695 ib.ptr[ib.length_dw++] = 1; /* z */ 1696 ib.ptr[ib.length_dw++] = 1697 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1698 1699 /* write CS partial flush packet */ 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1701 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1702 1703 /* shedule the ib on the ring */ 1704 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1705 if (r) { 1706 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1707 goto fail; 1708 } 1709 1710 /* wait for the GPU to finish processing the IB */ 1711 r = fence_wait(f, false); 1712 if (r) { 1713 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1714 goto fail; 1715 } 1716 1717 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1718 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1719 WREG32(mmGB_EDC_MODE, tmp); 1720 1721 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1722 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1723 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1724 1725 1726 /* read back registers to clear the counters */ 1727 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1728 RREG32(sec_ded_counter_registers[i]); 1729 1730 fail: 1731 amdgpu_ib_free(adev, &ib, NULL); 1732 fence_put(f); 1733 1734 return r; 1735 } 1736 1737 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1738 { 1739 u32 gb_addr_config; 1740 u32 mc_shared_chmap, mc_arb_ramcfg; 1741 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1742 u32 tmp; 1743 int ret; 1744 1745 switch (adev->asic_type) { 1746 case CHIP_TOPAZ: 1747 adev->gfx.config.max_shader_engines = 1; 1748 adev->gfx.config.max_tile_pipes = 2; 1749 adev->gfx.config.max_cu_per_sh = 6; 1750 adev->gfx.config.max_sh_per_se = 1; 1751 adev->gfx.config.max_backends_per_se = 2; 1752 adev->gfx.config.max_texture_channel_caches = 2; 1753 adev->gfx.config.max_gprs = 256; 1754 adev->gfx.config.max_gs_threads = 32; 1755 adev->gfx.config.max_hw_contexts = 8; 1756 1757 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1758 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1759 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1760 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1761 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1762 break; 1763 case CHIP_FIJI: 1764 adev->gfx.config.max_shader_engines = 4; 1765 adev->gfx.config.max_tile_pipes = 16; 1766 adev->gfx.config.max_cu_per_sh = 16; 1767 adev->gfx.config.max_sh_per_se = 1; 1768 adev->gfx.config.max_backends_per_se = 4; 1769 adev->gfx.config.max_texture_channel_caches = 16; 1770 adev->gfx.config.max_gprs = 256; 1771 adev->gfx.config.max_gs_threads = 32; 1772 adev->gfx.config.max_hw_contexts = 8; 1773 1774 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1775 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1776 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1777 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1778 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1779 break; 1780 case CHIP_POLARIS11: 1781 ret = amdgpu_atombios_get_gfx_info(adev); 1782 if (ret) 1783 return ret; 1784 adev->gfx.config.max_gprs = 256; 1785 adev->gfx.config.max_gs_threads = 32; 1786 adev->gfx.config.max_hw_contexts = 8; 1787 1788 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1789 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1790 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1791 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1792 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1793 break; 1794 case CHIP_POLARIS10: 1795 ret = amdgpu_atombios_get_gfx_info(adev); 1796 if (ret) 1797 return ret; 1798 adev->gfx.config.max_gprs = 256; 1799 adev->gfx.config.max_gs_threads = 32; 1800 adev->gfx.config.max_hw_contexts = 8; 1801 1802 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1803 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1804 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1805 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1806 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1807 break; 1808 case CHIP_TONGA: 1809 adev->gfx.config.max_shader_engines = 4; 1810 adev->gfx.config.max_tile_pipes = 8; 1811 adev->gfx.config.max_cu_per_sh = 8; 1812 adev->gfx.config.max_sh_per_se = 1; 1813 adev->gfx.config.max_backends_per_se = 2; 1814 adev->gfx.config.max_texture_channel_caches = 8; 1815 adev->gfx.config.max_gprs = 256; 1816 adev->gfx.config.max_gs_threads = 32; 1817 adev->gfx.config.max_hw_contexts = 8; 1818 1819 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1820 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1821 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1822 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1823 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1824 break; 1825 case CHIP_CARRIZO: 1826 adev->gfx.config.max_shader_engines = 1; 1827 adev->gfx.config.max_tile_pipes = 2; 1828 adev->gfx.config.max_sh_per_se = 1; 1829 adev->gfx.config.max_backends_per_se = 2; 1830 1831 switch (adev->pdev->revision) { 1832 case 0xc4: 1833 case 0x84: 1834 case 0xc8: 1835 case 0xcc: 1836 case 0xe1: 1837 case 0xe3: 1838 /* B10 */ 1839 adev->gfx.config.max_cu_per_sh = 8; 1840 break; 1841 case 0xc5: 1842 case 0x81: 1843 case 0x85: 1844 case 0xc9: 1845 case 0xcd: 1846 case 0xe2: 1847 case 0xe4: 1848 /* B8 */ 1849 adev->gfx.config.max_cu_per_sh = 6; 1850 break; 1851 case 0xc6: 1852 case 0xca: 1853 case 0xce: 1854 case 0x88: 1855 /* B6 */ 1856 adev->gfx.config.max_cu_per_sh = 6; 1857 break; 1858 case 0xc7: 1859 case 0x87: 1860 case 0xcb: 1861 case 0xe5: 1862 case 0x89: 1863 default: 1864 /* B4 */ 1865 adev->gfx.config.max_cu_per_sh = 4; 1866 break; 1867 } 1868 1869 adev->gfx.config.max_texture_channel_caches = 2; 1870 adev->gfx.config.max_gprs = 256; 1871 adev->gfx.config.max_gs_threads = 32; 1872 adev->gfx.config.max_hw_contexts = 8; 1873 1874 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1875 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1876 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1877 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1878 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1879 break; 1880 case CHIP_STONEY: 1881 adev->gfx.config.max_shader_engines = 1; 1882 adev->gfx.config.max_tile_pipes = 2; 1883 adev->gfx.config.max_sh_per_se = 1; 1884 adev->gfx.config.max_backends_per_se = 1; 1885 1886 switch (adev->pdev->revision) { 1887 case 0xc0: 1888 case 0xc1: 1889 case 0xc2: 1890 case 0xc4: 1891 case 0xc8: 1892 case 0xc9: 1893 adev->gfx.config.max_cu_per_sh = 3; 1894 break; 1895 case 0xd0: 1896 case 0xd1: 1897 case 0xd2: 1898 default: 1899 adev->gfx.config.max_cu_per_sh = 2; 1900 break; 1901 } 1902 1903 adev->gfx.config.max_texture_channel_caches = 2; 1904 adev->gfx.config.max_gprs = 256; 1905 adev->gfx.config.max_gs_threads = 16; 1906 adev->gfx.config.max_hw_contexts = 8; 1907 1908 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1909 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1910 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1911 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1912 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1913 break; 1914 default: 1915 adev->gfx.config.max_shader_engines = 2; 1916 adev->gfx.config.max_tile_pipes = 4; 1917 adev->gfx.config.max_cu_per_sh = 2; 1918 adev->gfx.config.max_sh_per_se = 1; 1919 adev->gfx.config.max_backends_per_se = 2; 1920 adev->gfx.config.max_texture_channel_caches = 4; 1921 adev->gfx.config.max_gprs = 256; 1922 adev->gfx.config.max_gs_threads = 32; 1923 adev->gfx.config.max_hw_contexts = 8; 1924 1925 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1926 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1927 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1928 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1929 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1930 break; 1931 } 1932 1933 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1934 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1935 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1936 1937 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1938 adev->gfx.config.mem_max_burst_length_bytes = 256; 1939 if (adev->flags & AMD_IS_APU) { 1940 /* Get memory bank mapping mode. */ 1941 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1942 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1943 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1944 1945 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1946 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1947 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1948 1949 /* Validate settings in case only one DIMM installed. */ 1950 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1951 dimm00_addr_map = 0; 1952 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1953 dimm01_addr_map = 0; 1954 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1955 dimm10_addr_map = 0; 1956 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1957 dimm11_addr_map = 0; 1958 1959 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1960 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1961 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1962 adev->gfx.config.mem_row_size_in_kb = 2; 1963 else 1964 adev->gfx.config.mem_row_size_in_kb = 1; 1965 } else { 1966 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1967 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1968 if (adev->gfx.config.mem_row_size_in_kb > 4) 1969 adev->gfx.config.mem_row_size_in_kb = 4; 1970 } 1971 1972 adev->gfx.config.shader_engine_tile_size = 32; 1973 adev->gfx.config.num_gpus = 1; 1974 adev->gfx.config.multi_gpu_tile_size = 64; 1975 1976 /* fix up row size */ 1977 switch (adev->gfx.config.mem_row_size_in_kb) { 1978 case 1: 1979 default: 1980 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1981 break; 1982 case 2: 1983 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1984 break; 1985 case 4: 1986 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1987 break; 1988 } 1989 adev->gfx.config.gb_addr_config = gb_addr_config; 1990 1991 return 0; 1992 } 1993 1994 static int gfx_v8_0_sw_init(void *handle) 1995 { 1996 int i, r; 1997 struct amdgpu_ring *ring; 1998 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1999 2000 /* EOP Event */ 2001 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 2002 if (r) 2003 return r; 2004 2005 /* Privileged reg */ 2006 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 2007 if (r) 2008 return r; 2009 2010 /* Privileged inst */ 2011 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 2012 if (r) 2013 return r; 2014 2015 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2016 2017 gfx_v8_0_scratch_init(adev); 2018 2019 r = gfx_v8_0_init_microcode(adev); 2020 if (r) { 2021 DRM_ERROR("Failed to load gfx firmware!\n"); 2022 return r; 2023 } 2024 2025 r = gfx_v8_0_rlc_init(adev); 2026 if (r) { 2027 DRM_ERROR("Failed to init rlc BOs!\n"); 2028 return r; 2029 } 2030 2031 r = gfx_v8_0_mec_init(adev); 2032 if (r) { 2033 DRM_ERROR("Failed to init MEC BOs!\n"); 2034 return r; 2035 } 2036 2037 /* set up the gfx ring */ 2038 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2039 ring = &adev->gfx.gfx_ring[i]; 2040 ring->ring_obj = NULL; 2041 sprintf(ring->name, "gfx"); 2042 /* no gfx doorbells on iceland */ 2043 if (adev->asic_type != CHIP_TOPAZ) { 2044 ring->use_doorbell = true; 2045 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2046 } 2047 2048 r = amdgpu_ring_init(adev, ring, 1024, 2049 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 2050 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, 2051 AMDGPU_RING_TYPE_GFX); 2052 if (r) 2053 return r; 2054 } 2055 2056 /* set up the compute queues */ 2057 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2058 unsigned irq_type; 2059 2060 /* max 32 queues per MEC */ 2061 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2062 DRM_ERROR("Too many (%d) compute rings!\n", i); 2063 break; 2064 } 2065 ring = &adev->gfx.compute_ring[i]; 2066 ring->ring_obj = NULL; 2067 ring->use_doorbell = true; 2068 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2069 ring->me = 1; /* first MEC */ 2070 ring->pipe = i / 8; 2071 ring->queue = i % 8; 2072 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2073 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2074 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2075 r = amdgpu_ring_init(adev, ring, 1024, 2076 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 2077 &adev->gfx.eop_irq, irq_type, 2078 AMDGPU_RING_TYPE_COMPUTE); 2079 if (r) 2080 return r; 2081 } 2082 2083 /* reserve GDS, GWS and OA resource for gfx */ 2084 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2085 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2086 &adev->gds.gds_gfx_bo, NULL, NULL); 2087 if (r) 2088 return r; 2089 2090 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2091 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2092 &adev->gds.gws_gfx_bo, NULL, NULL); 2093 if (r) 2094 return r; 2095 2096 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2097 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2098 &adev->gds.oa_gfx_bo, NULL, NULL); 2099 if (r) 2100 return r; 2101 2102 adev->gfx.ce_ram_size = 0x8000; 2103 2104 r = gfx_v8_0_gpu_early_init(adev); 2105 if (r) 2106 return r; 2107 2108 return 0; 2109 } 2110 2111 static int gfx_v8_0_sw_fini(void *handle) 2112 { 2113 int i; 2114 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2115 2116 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2117 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2118 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2119 2120 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2121 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2122 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2123 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2124 2125 gfx_v8_0_mec_fini(adev); 2126 gfx_v8_0_rlc_fini(adev); 2127 gfx_v8_0_free_microcode(adev); 2128 2129 return 0; 2130 } 2131 2132 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2133 { 2134 uint32_t *modearray, *mod2array; 2135 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2136 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2137 u32 reg_offset; 2138 2139 modearray = adev->gfx.config.tile_mode_array; 2140 mod2array = adev->gfx.config.macrotile_mode_array; 2141 2142 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2143 modearray[reg_offset] = 0; 2144 2145 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2146 mod2array[reg_offset] = 0; 2147 2148 switch (adev->asic_type) { 2149 case CHIP_TOPAZ: 2150 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2151 PIPE_CONFIG(ADDR_SURF_P2) | 2152 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2153 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2154 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2155 PIPE_CONFIG(ADDR_SURF_P2) | 2156 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2158 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2159 PIPE_CONFIG(ADDR_SURF_P2) | 2160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2161 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2162 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2163 PIPE_CONFIG(ADDR_SURF_P2) | 2164 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2166 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2167 PIPE_CONFIG(ADDR_SURF_P2) | 2168 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2170 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2171 PIPE_CONFIG(ADDR_SURF_P2) | 2172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2173 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2174 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2175 PIPE_CONFIG(ADDR_SURF_P2) | 2176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2178 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2179 PIPE_CONFIG(ADDR_SURF_P2)); 2180 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2181 PIPE_CONFIG(ADDR_SURF_P2) | 2182 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2184 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2185 PIPE_CONFIG(ADDR_SURF_P2) | 2186 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2187 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2188 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2189 PIPE_CONFIG(ADDR_SURF_P2) | 2190 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2191 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2192 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2193 PIPE_CONFIG(ADDR_SURF_P2) | 2194 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2196 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2197 PIPE_CONFIG(ADDR_SURF_P2) | 2198 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2200 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2201 PIPE_CONFIG(ADDR_SURF_P2) | 2202 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2204 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2205 PIPE_CONFIG(ADDR_SURF_P2) | 2206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2208 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2209 PIPE_CONFIG(ADDR_SURF_P2) | 2210 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2212 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2213 PIPE_CONFIG(ADDR_SURF_P2) | 2214 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2216 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2217 PIPE_CONFIG(ADDR_SURF_P2) | 2218 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2220 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2221 PIPE_CONFIG(ADDR_SURF_P2) | 2222 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2224 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2225 PIPE_CONFIG(ADDR_SURF_P2) | 2226 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2228 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2229 PIPE_CONFIG(ADDR_SURF_P2) | 2230 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2232 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2233 PIPE_CONFIG(ADDR_SURF_P2) | 2234 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2236 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2237 PIPE_CONFIG(ADDR_SURF_P2) | 2238 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2240 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2241 PIPE_CONFIG(ADDR_SURF_P2) | 2242 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2244 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2245 PIPE_CONFIG(ADDR_SURF_P2) | 2246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2248 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2249 PIPE_CONFIG(ADDR_SURF_P2) | 2250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2252 2253 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2256 NUM_BANKS(ADDR_SURF_8_BANK)); 2257 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2258 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2259 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2260 NUM_BANKS(ADDR_SURF_8_BANK)); 2261 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2262 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2263 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2264 NUM_BANKS(ADDR_SURF_8_BANK)); 2265 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2268 NUM_BANKS(ADDR_SURF_8_BANK)); 2269 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2270 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2271 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2272 NUM_BANKS(ADDR_SURF_8_BANK)); 2273 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2276 NUM_BANKS(ADDR_SURF_8_BANK)); 2277 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2280 NUM_BANKS(ADDR_SURF_8_BANK)); 2281 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2282 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2283 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2284 NUM_BANKS(ADDR_SURF_16_BANK)); 2285 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2288 NUM_BANKS(ADDR_SURF_16_BANK)); 2289 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2292 NUM_BANKS(ADDR_SURF_16_BANK)); 2293 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2296 NUM_BANKS(ADDR_SURF_16_BANK)); 2297 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2300 NUM_BANKS(ADDR_SURF_16_BANK)); 2301 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2304 NUM_BANKS(ADDR_SURF_16_BANK)); 2305 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2308 NUM_BANKS(ADDR_SURF_8_BANK)); 2309 2310 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2311 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2312 reg_offset != 23) 2313 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2314 2315 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2316 if (reg_offset != 7) 2317 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2318 2319 break; 2320 case CHIP_FIJI: 2321 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2322 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2323 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2324 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2325 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2326 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2327 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2328 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2329 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2330 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2332 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2333 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2334 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2335 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2336 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2337 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2338 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2339 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2341 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2342 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2343 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2344 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2345 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2346 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2347 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2348 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2349 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2350 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2351 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2352 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2353 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2354 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2355 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2359 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2361 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2363 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2365 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2367 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2368 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2369 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2371 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2372 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2373 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2375 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2376 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2377 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2379 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2380 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2381 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2383 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2384 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2385 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2386 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2387 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2388 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2389 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2391 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2392 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2393 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2395 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2396 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2397 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2398 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2399 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2400 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2401 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2402 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2403 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2405 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2407 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2409 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2410 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2411 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2412 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2413 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2414 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2415 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2417 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2418 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2419 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2420 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2421 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2423 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2425 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2427 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2431 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2435 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2439 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2440 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2443 2444 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2445 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2446 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2447 NUM_BANKS(ADDR_SURF_8_BANK)); 2448 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2449 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2450 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2451 NUM_BANKS(ADDR_SURF_8_BANK)); 2452 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2455 NUM_BANKS(ADDR_SURF_8_BANK)); 2456 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2457 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2458 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2459 NUM_BANKS(ADDR_SURF_8_BANK)); 2460 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2461 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2462 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2463 NUM_BANKS(ADDR_SURF_8_BANK)); 2464 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2467 NUM_BANKS(ADDR_SURF_8_BANK)); 2468 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2469 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2470 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2471 NUM_BANKS(ADDR_SURF_8_BANK)); 2472 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2473 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2474 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2475 NUM_BANKS(ADDR_SURF_8_BANK)); 2476 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2479 NUM_BANKS(ADDR_SURF_8_BANK)); 2480 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2481 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2482 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2483 NUM_BANKS(ADDR_SURF_8_BANK)); 2484 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2485 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2486 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2487 NUM_BANKS(ADDR_SURF_8_BANK)); 2488 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2491 NUM_BANKS(ADDR_SURF_8_BANK)); 2492 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2495 NUM_BANKS(ADDR_SURF_8_BANK)); 2496 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2499 NUM_BANKS(ADDR_SURF_4_BANK)); 2500 2501 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2502 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2503 2504 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2505 if (reg_offset != 7) 2506 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2507 2508 break; 2509 case CHIP_TONGA: 2510 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2511 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2512 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2513 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2514 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2515 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2518 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2519 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2520 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2522 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2523 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2524 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2525 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2526 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2527 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2530 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2531 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2532 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2533 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2534 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2535 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2536 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2538 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2539 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2540 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2542 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2543 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2544 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2548 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2552 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2556 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2557 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2558 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2560 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2561 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2562 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2564 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2565 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2566 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2568 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2569 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2570 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2571 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2572 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2573 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2574 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2576 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2577 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2580 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2581 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2582 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2583 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2584 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2585 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2586 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2588 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2589 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2590 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2591 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2592 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2594 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2596 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2597 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2598 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2600 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2601 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2602 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2603 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2604 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2606 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2607 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2608 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2610 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2612 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2614 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2615 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2616 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2620 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2624 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2628 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2632 2633 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2634 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2635 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2636 NUM_BANKS(ADDR_SURF_16_BANK)); 2637 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2638 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2639 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2640 NUM_BANKS(ADDR_SURF_16_BANK)); 2641 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2642 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2643 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2644 NUM_BANKS(ADDR_SURF_16_BANK)); 2645 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2646 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2647 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2648 NUM_BANKS(ADDR_SURF_16_BANK)); 2649 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2650 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2651 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2652 NUM_BANKS(ADDR_SURF_16_BANK)); 2653 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2654 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2655 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2656 NUM_BANKS(ADDR_SURF_16_BANK)); 2657 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2658 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2659 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2660 NUM_BANKS(ADDR_SURF_16_BANK)); 2661 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2662 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2663 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2664 NUM_BANKS(ADDR_SURF_16_BANK)); 2665 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2666 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2667 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2668 NUM_BANKS(ADDR_SURF_16_BANK)); 2669 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2670 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2671 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2672 NUM_BANKS(ADDR_SURF_16_BANK)); 2673 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2674 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2675 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2676 NUM_BANKS(ADDR_SURF_16_BANK)); 2677 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2680 NUM_BANKS(ADDR_SURF_8_BANK)); 2681 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2682 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2683 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2684 NUM_BANKS(ADDR_SURF_4_BANK)); 2685 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2686 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2687 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2688 NUM_BANKS(ADDR_SURF_4_BANK)); 2689 2690 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2691 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2692 2693 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2694 if (reg_offset != 7) 2695 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2696 2697 break; 2698 case CHIP_POLARIS11: 2699 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2700 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2701 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2703 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2704 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2707 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2708 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2709 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2710 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2711 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2712 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2713 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2714 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2715 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2716 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2717 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2718 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2719 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2720 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2721 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2722 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2723 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2725 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2726 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2727 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2728 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2729 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2730 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2731 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2732 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2733 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2734 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2735 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2737 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2741 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2742 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2743 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2745 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2746 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2747 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2749 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2750 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2751 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2753 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2754 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2757 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2758 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2761 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2762 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2763 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2765 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2766 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2767 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2769 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2770 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2771 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2773 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2774 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2775 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2777 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2778 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2779 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2781 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2782 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2783 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2784 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2785 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2786 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2788 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2789 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2790 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2792 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2793 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2794 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2796 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2797 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2798 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2800 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2801 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2802 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2804 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2805 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2806 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2808 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2809 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2812 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2813 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2816 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2817 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2820 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2821 2822 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2825 NUM_BANKS(ADDR_SURF_16_BANK)); 2826 2827 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2828 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2829 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2830 NUM_BANKS(ADDR_SURF_16_BANK)); 2831 2832 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2833 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2834 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2835 NUM_BANKS(ADDR_SURF_16_BANK)); 2836 2837 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2838 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2839 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2840 NUM_BANKS(ADDR_SURF_16_BANK)); 2841 2842 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2843 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2844 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2845 NUM_BANKS(ADDR_SURF_16_BANK)); 2846 2847 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2848 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2849 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2850 NUM_BANKS(ADDR_SURF_16_BANK)); 2851 2852 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2853 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2854 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2855 NUM_BANKS(ADDR_SURF_16_BANK)); 2856 2857 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2858 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2859 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2860 NUM_BANKS(ADDR_SURF_16_BANK)); 2861 2862 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2863 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2864 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2865 NUM_BANKS(ADDR_SURF_16_BANK)); 2866 2867 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2868 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2869 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2870 NUM_BANKS(ADDR_SURF_16_BANK)); 2871 2872 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2873 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2874 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2875 NUM_BANKS(ADDR_SURF_16_BANK)); 2876 2877 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2880 NUM_BANKS(ADDR_SURF_16_BANK)); 2881 2882 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2883 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2884 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2885 NUM_BANKS(ADDR_SURF_8_BANK)); 2886 2887 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2888 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2889 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2890 NUM_BANKS(ADDR_SURF_4_BANK)); 2891 2892 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2893 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2894 2895 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2896 if (reg_offset != 7) 2897 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2898 2899 break; 2900 case CHIP_POLARIS10: 2901 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2902 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2903 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2904 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2905 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2906 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2907 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2908 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2909 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2910 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2911 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2912 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2913 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2914 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2915 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2916 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2917 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2918 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2919 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2920 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2921 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2922 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2923 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2924 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2925 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2926 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2927 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2928 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2929 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2930 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2931 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2932 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2933 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2935 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2936 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2937 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2939 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2941 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2942 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2943 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2944 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2945 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2946 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2947 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2948 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2949 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2950 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2951 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2953 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2955 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2956 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2957 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2958 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2959 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2960 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2961 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2962 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2963 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2964 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2965 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2967 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2968 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2969 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2970 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2971 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2972 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2973 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2974 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2975 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2976 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2977 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2978 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2979 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2980 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2981 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2982 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2983 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2984 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2985 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2986 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2987 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2990 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2991 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2992 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2994 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2995 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2996 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2998 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2999 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3002 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3003 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3004 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3006 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3007 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3008 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3010 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3011 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3014 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3015 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3018 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3019 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3020 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3021 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3022 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3023 3024 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3025 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3026 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3027 NUM_BANKS(ADDR_SURF_16_BANK)); 3028 3029 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3030 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3031 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3032 NUM_BANKS(ADDR_SURF_16_BANK)); 3033 3034 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3035 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3036 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3037 NUM_BANKS(ADDR_SURF_16_BANK)); 3038 3039 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3040 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3041 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3042 NUM_BANKS(ADDR_SURF_16_BANK)); 3043 3044 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3047 NUM_BANKS(ADDR_SURF_16_BANK)); 3048 3049 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3050 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3051 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3052 NUM_BANKS(ADDR_SURF_16_BANK)); 3053 3054 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3055 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3056 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3057 NUM_BANKS(ADDR_SURF_16_BANK)); 3058 3059 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3060 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3061 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3062 NUM_BANKS(ADDR_SURF_16_BANK)); 3063 3064 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3065 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3066 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3067 NUM_BANKS(ADDR_SURF_16_BANK)); 3068 3069 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3072 NUM_BANKS(ADDR_SURF_16_BANK)); 3073 3074 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3075 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3076 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3077 NUM_BANKS(ADDR_SURF_16_BANK)); 3078 3079 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3080 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3081 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3082 NUM_BANKS(ADDR_SURF_8_BANK)); 3083 3084 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3085 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3086 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3087 NUM_BANKS(ADDR_SURF_4_BANK)); 3088 3089 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3092 NUM_BANKS(ADDR_SURF_4_BANK)); 3093 3094 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3095 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3096 3097 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3098 if (reg_offset != 7) 3099 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3100 3101 break; 3102 case CHIP_STONEY: 3103 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3104 PIPE_CONFIG(ADDR_SURF_P2) | 3105 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3107 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3108 PIPE_CONFIG(ADDR_SURF_P2) | 3109 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3111 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3112 PIPE_CONFIG(ADDR_SURF_P2) | 3113 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3114 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3115 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3116 PIPE_CONFIG(ADDR_SURF_P2) | 3117 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3118 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3119 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3120 PIPE_CONFIG(ADDR_SURF_P2) | 3121 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3123 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3124 PIPE_CONFIG(ADDR_SURF_P2) | 3125 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3126 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3127 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3128 PIPE_CONFIG(ADDR_SURF_P2) | 3129 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3130 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3131 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3132 PIPE_CONFIG(ADDR_SURF_P2)); 3133 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3134 PIPE_CONFIG(ADDR_SURF_P2) | 3135 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3137 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3138 PIPE_CONFIG(ADDR_SURF_P2) | 3139 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3141 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3142 PIPE_CONFIG(ADDR_SURF_P2) | 3143 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3145 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3146 PIPE_CONFIG(ADDR_SURF_P2) | 3147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3149 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3150 PIPE_CONFIG(ADDR_SURF_P2) | 3151 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3153 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3154 PIPE_CONFIG(ADDR_SURF_P2) | 3155 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3157 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3158 PIPE_CONFIG(ADDR_SURF_P2) | 3159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3161 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3162 PIPE_CONFIG(ADDR_SURF_P2) | 3163 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3164 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3165 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3166 PIPE_CONFIG(ADDR_SURF_P2) | 3167 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3168 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3169 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3170 PIPE_CONFIG(ADDR_SURF_P2) | 3171 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3172 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3173 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3174 PIPE_CONFIG(ADDR_SURF_P2) | 3175 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3177 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3178 PIPE_CONFIG(ADDR_SURF_P2) | 3179 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3181 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3182 PIPE_CONFIG(ADDR_SURF_P2) | 3183 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3185 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3186 PIPE_CONFIG(ADDR_SURF_P2) | 3187 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3189 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3190 PIPE_CONFIG(ADDR_SURF_P2) | 3191 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3193 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3194 PIPE_CONFIG(ADDR_SURF_P2) | 3195 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3197 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3198 PIPE_CONFIG(ADDR_SURF_P2) | 3199 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3201 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3202 PIPE_CONFIG(ADDR_SURF_P2) | 3203 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3205 3206 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3209 NUM_BANKS(ADDR_SURF_8_BANK)); 3210 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3213 NUM_BANKS(ADDR_SURF_8_BANK)); 3214 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3217 NUM_BANKS(ADDR_SURF_8_BANK)); 3218 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3221 NUM_BANKS(ADDR_SURF_8_BANK)); 3222 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3223 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3224 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3225 NUM_BANKS(ADDR_SURF_8_BANK)); 3226 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3227 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3228 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3229 NUM_BANKS(ADDR_SURF_8_BANK)); 3230 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3233 NUM_BANKS(ADDR_SURF_8_BANK)); 3234 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3235 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3236 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3237 NUM_BANKS(ADDR_SURF_16_BANK)); 3238 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3239 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3240 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3241 NUM_BANKS(ADDR_SURF_16_BANK)); 3242 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3245 NUM_BANKS(ADDR_SURF_16_BANK)); 3246 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3247 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3248 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3249 NUM_BANKS(ADDR_SURF_16_BANK)); 3250 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3253 NUM_BANKS(ADDR_SURF_16_BANK)); 3254 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3257 NUM_BANKS(ADDR_SURF_16_BANK)); 3258 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3261 NUM_BANKS(ADDR_SURF_8_BANK)); 3262 3263 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3264 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3265 reg_offset != 23) 3266 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3267 3268 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3269 if (reg_offset != 7) 3270 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3271 3272 break; 3273 default: 3274 dev_warn(adev->dev, 3275 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3276 adev->asic_type); 3277 3278 case CHIP_CARRIZO: 3279 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3283 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3287 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3291 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3294 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3295 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3298 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3299 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3303 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3306 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3307 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3308 PIPE_CONFIG(ADDR_SURF_P2)); 3309 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3310 PIPE_CONFIG(ADDR_SURF_P2) | 3311 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3313 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3314 PIPE_CONFIG(ADDR_SURF_P2) | 3315 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3317 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3318 PIPE_CONFIG(ADDR_SURF_P2) | 3319 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3321 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3322 PIPE_CONFIG(ADDR_SURF_P2) | 3323 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3325 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3326 PIPE_CONFIG(ADDR_SURF_P2) | 3327 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3329 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3330 PIPE_CONFIG(ADDR_SURF_P2) | 3331 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3333 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3334 PIPE_CONFIG(ADDR_SURF_P2) | 3335 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3337 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3338 PIPE_CONFIG(ADDR_SURF_P2) | 3339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3341 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3342 PIPE_CONFIG(ADDR_SURF_P2) | 3343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3345 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3346 PIPE_CONFIG(ADDR_SURF_P2) | 3347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3349 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3350 PIPE_CONFIG(ADDR_SURF_P2) | 3351 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3353 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3354 PIPE_CONFIG(ADDR_SURF_P2) | 3355 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3357 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3358 PIPE_CONFIG(ADDR_SURF_P2) | 3359 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3361 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3362 PIPE_CONFIG(ADDR_SURF_P2) | 3363 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3365 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3366 PIPE_CONFIG(ADDR_SURF_P2) | 3367 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3369 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3370 PIPE_CONFIG(ADDR_SURF_P2) | 3371 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3373 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3374 PIPE_CONFIG(ADDR_SURF_P2) | 3375 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3377 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3378 PIPE_CONFIG(ADDR_SURF_P2) | 3379 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3381 3382 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3385 NUM_BANKS(ADDR_SURF_8_BANK)); 3386 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3389 NUM_BANKS(ADDR_SURF_8_BANK)); 3390 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3393 NUM_BANKS(ADDR_SURF_8_BANK)); 3394 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3397 NUM_BANKS(ADDR_SURF_8_BANK)); 3398 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3401 NUM_BANKS(ADDR_SURF_8_BANK)); 3402 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3405 NUM_BANKS(ADDR_SURF_8_BANK)); 3406 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3409 NUM_BANKS(ADDR_SURF_8_BANK)); 3410 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3413 NUM_BANKS(ADDR_SURF_16_BANK)); 3414 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3417 NUM_BANKS(ADDR_SURF_16_BANK)); 3418 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3419 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3420 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3421 NUM_BANKS(ADDR_SURF_16_BANK)); 3422 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3423 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3424 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3425 NUM_BANKS(ADDR_SURF_16_BANK)); 3426 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3427 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3428 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3429 NUM_BANKS(ADDR_SURF_16_BANK)); 3430 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3431 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3432 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3433 NUM_BANKS(ADDR_SURF_16_BANK)); 3434 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3435 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3436 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3437 NUM_BANKS(ADDR_SURF_8_BANK)); 3438 3439 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3440 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3441 reg_offset != 23) 3442 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3443 3444 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3445 if (reg_offset != 7) 3446 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3447 3448 break; 3449 } 3450 } 3451 3452 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3453 u32 se_num, u32 sh_num, u32 instance) 3454 { 3455 u32 data; 3456 3457 if (instance == 0xffffffff) 3458 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3459 else 3460 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3461 3462 if (se_num == 0xffffffff) 3463 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3464 else 3465 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3466 3467 if (sh_num == 0xffffffff) 3468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3469 else 3470 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3471 3472 WREG32(mmGRBM_GFX_INDEX, data); 3473 } 3474 3475 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3476 { 3477 return (u32)((1ULL << bit_width) - 1); 3478 } 3479 3480 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3481 { 3482 u32 data, mask; 3483 3484 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3485 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3486 3487 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3488 3489 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3490 adev->gfx.config.max_sh_per_se); 3491 3492 return (~data) & mask; 3493 } 3494 3495 static void 3496 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3497 { 3498 switch (adev->asic_type) { 3499 case CHIP_FIJI: 3500 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3501 RB_XSEL2(1) | PKR_MAP(2) | 3502 PKR_XSEL(1) | PKR_YSEL(1) | 3503 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3504 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3505 SE_PAIR_YSEL(2); 3506 break; 3507 case CHIP_TONGA: 3508 case CHIP_POLARIS10: 3509 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3510 SE_XSEL(1) | SE_YSEL(1); 3511 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3512 SE_PAIR_YSEL(2); 3513 break; 3514 case CHIP_TOPAZ: 3515 case CHIP_CARRIZO: 3516 *rconf |= RB_MAP_PKR0(2); 3517 *rconf1 |= 0x0; 3518 break; 3519 case CHIP_POLARIS11: 3520 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3521 SE_XSEL(1) | SE_YSEL(1); 3522 *rconf1 |= 0x0; 3523 break; 3524 case CHIP_STONEY: 3525 *rconf |= 0x0; 3526 *rconf1 |= 0x0; 3527 break; 3528 default: 3529 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3530 break; 3531 } 3532 } 3533 3534 static void 3535 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3536 u32 raster_config, u32 raster_config_1, 3537 unsigned rb_mask, unsigned num_rb) 3538 { 3539 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3540 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3541 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3542 unsigned rb_per_se = num_rb / num_se; 3543 unsigned se_mask[4]; 3544 unsigned se; 3545 3546 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3547 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3548 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3549 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3550 3551 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3552 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3553 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3554 3555 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3556 (!se_mask[2] && !se_mask[3]))) { 3557 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3558 3559 if (!se_mask[0] && !se_mask[1]) { 3560 raster_config_1 |= 3561 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3562 } else { 3563 raster_config_1 |= 3564 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3565 } 3566 } 3567 3568 for (se = 0; se < num_se; se++) { 3569 unsigned raster_config_se = raster_config; 3570 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3571 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3572 int idx = (se / 2) * 2; 3573 3574 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3575 raster_config_se &= ~SE_MAP_MASK; 3576 3577 if (!se_mask[idx]) { 3578 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3579 } else { 3580 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3581 } 3582 } 3583 3584 pkr0_mask &= rb_mask; 3585 pkr1_mask &= rb_mask; 3586 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3587 raster_config_se &= ~PKR_MAP_MASK; 3588 3589 if (!pkr0_mask) { 3590 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3591 } else { 3592 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3593 } 3594 } 3595 3596 if (rb_per_se >= 2) { 3597 unsigned rb0_mask = 1 << (se * rb_per_se); 3598 unsigned rb1_mask = rb0_mask << 1; 3599 3600 rb0_mask &= rb_mask; 3601 rb1_mask &= rb_mask; 3602 if (!rb0_mask || !rb1_mask) { 3603 raster_config_se &= ~RB_MAP_PKR0_MASK; 3604 3605 if (!rb0_mask) { 3606 raster_config_se |= 3607 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3608 } else { 3609 raster_config_se |= 3610 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3611 } 3612 } 3613 3614 if (rb_per_se > 2) { 3615 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3616 rb1_mask = rb0_mask << 1; 3617 rb0_mask &= rb_mask; 3618 rb1_mask &= rb_mask; 3619 if (!rb0_mask || !rb1_mask) { 3620 raster_config_se &= ~RB_MAP_PKR1_MASK; 3621 3622 if (!rb0_mask) { 3623 raster_config_se |= 3624 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3625 } else { 3626 raster_config_se |= 3627 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3628 } 3629 } 3630 } 3631 } 3632 3633 /* GRBM_GFX_INDEX has a different offset on VI */ 3634 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3635 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3636 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3637 } 3638 3639 /* GRBM_GFX_INDEX has a different offset on VI */ 3640 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3641 } 3642 3643 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3644 { 3645 int i, j; 3646 u32 data; 3647 u32 raster_config = 0, raster_config_1 = 0; 3648 u32 active_rbs = 0; 3649 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3650 adev->gfx.config.max_sh_per_se; 3651 unsigned num_rb_pipes; 3652 3653 mutex_lock(&adev->grbm_idx_mutex); 3654 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3655 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3656 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3657 data = gfx_v8_0_get_rb_active_bitmap(adev); 3658 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3659 rb_bitmap_width_per_sh); 3660 } 3661 } 3662 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3663 3664 adev->gfx.config.backend_enable_mask = active_rbs; 3665 adev->gfx.config.num_rbs = hweight32(active_rbs); 3666 3667 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3668 adev->gfx.config.max_shader_engines, 16); 3669 3670 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3671 3672 if (!adev->gfx.config.backend_enable_mask || 3673 adev->gfx.config.num_rbs >= num_rb_pipes) { 3674 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3675 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3676 } else { 3677 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3678 adev->gfx.config.backend_enable_mask, 3679 num_rb_pipes); 3680 } 3681 3682 mutex_unlock(&adev->grbm_idx_mutex); 3683 } 3684 3685 /** 3686 * gfx_v8_0_init_compute_vmid - gart enable 3687 * 3688 * @rdev: amdgpu_device pointer 3689 * 3690 * Initialize compute vmid sh_mem registers 3691 * 3692 */ 3693 #define DEFAULT_SH_MEM_BASES (0x6000) 3694 #define FIRST_COMPUTE_VMID (8) 3695 #define LAST_COMPUTE_VMID (16) 3696 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3697 { 3698 int i; 3699 uint32_t sh_mem_config; 3700 uint32_t sh_mem_bases; 3701 3702 /* 3703 * Configure apertures: 3704 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3705 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3706 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3707 */ 3708 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3709 3710 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3711 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3712 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3713 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3714 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3715 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3716 3717 mutex_lock(&adev->srbm_mutex); 3718 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3719 vi_srbm_select(adev, 0, 0, 0, i); 3720 /* CP and shaders */ 3721 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3722 WREG32(mmSH_MEM_APE1_BASE, 1); 3723 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3724 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3725 } 3726 vi_srbm_select(adev, 0, 0, 0, 0); 3727 mutex_unlock(&adev->srbm_mutex); 3728 } 3729 3730 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3731 { 3732 u32 tmp; 3733 int i; 3734 3735 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3736 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3737 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3738 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3739 3740 gfx_v8_0_tiling_mode_table_init(adev); 3741 gfx_v8_0_setup_rb(adev); 3742 gfx_v8_0_get_cu_info(adev); 3743 3744 /* XXX SH_MEM regs */ 3745 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3746 mutex_lock(&adev->srbm_mutex); 3747 for (i = 0; i < 16; i++) { 3748 vi_srbm_select(adev, 0, 0, 0, i); 3749 /* CP and shaders */ 3750 if (i == 0) { 3751 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3752 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3753 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3754 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3755 WREG32(mmSH_MEM_CONFIG, tmp); 3756 } else { 3757 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3758 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3759 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3760 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3761 WREG32(mmSH_MEM_CONFIG, tmp); 3762 } 3763 3764 WREG32(mmSH_MEM_APE1_BASE, 1); 3765 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3766 WREG32(mmSH_MEM_BASES, 0); 3767 } 3768 vi_srbm_select(adev, 0, 0, 0, 0); 3769 mutex_unlock(&adev->srbm_mutex); 3770 3771 gfx_v8_0_init_compute_vmid(adev); 3772 3773 mutex_lock(&adev->grbm_idx_mutex); 3774 /* 3775 * making sure that the following register writes will be broadcasted 3776 * to all the shaders 3777 */ 3778 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3779 3780 WREG32(mmPA_SC_FIFO_SIZE, 3781 (adev->gfx.config.sc_prim_fifo_size_frontend << 3782 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3783 (adev->gfx.config.sc_prim_fifo_size_backend << 3784 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3785 (adev->gfx.config.sc_hiz_tile_fifo_size << 3786 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3787 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3788 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3789 mutex_unlock(&adev->grbm_idx_mutex); 3790 3791 } 3792 3793 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3794 { 3795 u32 i, j, k; 3796 u32 mask; 3797 3798 mutex_lock(&adev->grbm_idx_mutex); 3799 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3800 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3801 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3802 for (k = 0; k < adev->usec_timeout; k++) { 3803 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3804 break; 3805 udelay(1); 3806 } 3807 } 3808 } 3809 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3810 mutex_unlock(&adev->grbm_idx_mutex); 3811 3812 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3813 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3814 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3815 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3816 for (k = 0; k < adev->usec_timeout; k++) { 3817 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3818 break; 3819 udelay(1); 3820 } 3821 } 3822 3823 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3824 bool enable) 3825 { 3826 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3827 3828 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3829 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3830 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3831 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3832 3833 WREG32(mmCP_INT_CNTL_RING0, tmp); 3834 } 3835 3836 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3837 { 3838 /* csib */ 3839 WREG32(mmRLC_CSIB_ADDR_HI, 3840 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3841 WREG32(mmRLC_CSIB_ADDR_LO, 3842 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3843 WREG32(mmRLC_CSIB_LENGTH, 3844 adev->gfx.rlc.clear_state_size); 3845 } 3846 3847 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3848 int ind_offset, 3849 int list_size, 3850 int *unique_indices, 3851 int *indices_count, 3852 int max_indices, 3853 int *ind_start_offsets, 3854 int *offset_count, 3855 int max_offset) 3856 { 3857 int indices; 3858 bool new_entry = true; 3859 3860 for (; ind_offset < list_size; ind_offset++) { 3861 3862 if (new_entry) { 3863 new_entry = false; 3864 ind_start_offsets[*offset_count] = ind_offset; 3865 *offset_count = *offset_count + 1; 3866 BUG_ON(*offset_count >= max_offset); 3867 } 3868 3869 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3870 new_entry = true; 3871 continue; 3872 } 3873 3874 ind_offset += 2; 3875 3876 /* look for the matching indice */ 3877 for (indices = 0; 3878 indices < *indices_count; 3879 indices++) { 3880 if (unique_indices[indices] == 3881 register_list_format[ind_offset]) 3882 break; 3883 } 3884 3885 if (indices >= *indices_count) { 3886 unique_indices[*indices_count] = 3887 register_list_format[ind_offset]; 3888 indices = *indices_count; 3889 *indices_count = *indices_count + 1; 3890 BUG_ON(*indices_count >= max_indices); 3891 } 3892 3893 register_list_format[ind_offset] = indices; 3894 } 3895 } 3896 3897 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3898 { 3899 int i, temp, data; 3900 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3901 int indices_count = 0; 3902 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3903 int offset_count = 0; 3904 3905 int list_size; 3906 unsigned int *register_list_format = 3907 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3908 if (register_list_format == NULL) 3909 return -ENOMEM; 3910 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3911 adev->gfx.rlc.reg_list_format_size_bytes); 3912 3913 gfx_v8_0_parse_ind_reg_list(register_list_format, 3914 RLC_FormatDirectRegListLength, 3915 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3916 unique_indices, 3917 &indices_count, 3918 sizeof(unique_indices) / sizeof(int), 3919 indirect_start_offsets, 3920 &offset_count, 3921 sizeof(indirect_start_offsets)/sizeof(int)); 3922 3923 /* save and restore list */ 3924 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3925 3926 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3927 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3928 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3929 3930 /* indirect list */ 3931 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3932 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3933 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3934 3935 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3936 list_size = list_size >> 1; 3937 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3938 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3939 3940 /* starting offsets starts */ 3941 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3942 adev->gfx.rlc.starting_offsets_start); 3943 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3944 WREG32(mmRLC_GPM_SCRATCH_DATA, 3945 indirect_start_offsets[i]); 3946 3947 /* unique indices */ 3948 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3949 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3950 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3951 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); 3952 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); 3953 } 3954 kfree(register_list_format); 3955 3956 return 0; 3957 } 3958 3959 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3960 { 3961 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3962 } 3963 3964 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3965 { 3966 uint32_t data; 3967 3968 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3969 AMD_PG_SUPPORT_GFX_SMG | 3970 AMD_PG_SUPPORT_GFX_DMG)) { 3971 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3972 3973 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3974 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3975 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3976 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3977 WREG32(mmRLC_PG_DELAY, data); 3978 3979 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3980 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3981 } 3982 } 3983 3984 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3985 bool enable) 3986 { 3987 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 3988 } 3989 3990 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 3991 bool enable) 3992 { 3993 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 3994 } 3995 3996 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 3997 { 3998 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 1 : 0); 3999 } 4000 4001 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4002 { 4003 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 4004 AMD_PG_SUPPORT_GFX_SMG | 4005 AMD_PG_SUPPORT_GFX_DMG | 4006 AMD_PG_SUPPORT_CP | 4007 AMD_PG_SUPPORT_GDS | 4008 AMD_PG_SUPPORT_RLC_SMU_HS)) { 4009 gfx_v8_0_init_csb(adev); 4010 gfx_v8_0_init_save_restore_list(adev); 4011 gfx_v8_0_enable_save_restore_machine(adev); 4012 4013 if ((adev->asic_type == CHIP_CARRIZO) || 4014 (adev->asic_type == CHIP_STONEY)) { 4015 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4016 gfx_v8_0_init_power_gating(adev); 4017 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4018 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 4019 cz_enable_sck_slow_down_on_power_up(adev, true); 4020 cz_enable_sck_slow_down_on_power_down(adev, true); 4021 } else { 4022 cz_enable_sck_slow_down_on_power_up(adev, false); 4023 cz_enable_sck_slow_down_on_power_down(adev, false); 4024 } 4025 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 4026 cz_enable_cp_power_gating(adev, true); 4027 else 4028 cz_enable_cp_power_gating(adev, false); 4029 } else if (adev->asic_type == CHIP_POLARIS11) { 4030 gfx_v8_0_init_power_gating(adev); 4031 } 4032 } 4033 } 4034 4035 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4036 { 4037 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4038 4039 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4040 gfx_v8_0_wait_for_rlc_serdes(adev); 4041 } 4042 4043 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4044 { 4045 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4046 udelay(50); 4047 4048 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4049 udelay(50); 4050 } 4051 4052 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4053 { 4054 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4055 4056 /* carrizo do enable cp interrupt after cp inited */ 4057 if (!(adev->flags & AMD_IS_APU)) 4058 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4059 4060 udelay(50); 4061 } 4062 4063 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4064 { 4065 const struct rlc_firmware_header_v2_0 *hdr; 4066 const __le32 *fw_data; 4067 unsigned i, fw_size; 4068 4069 if (!adev->gfx.rlc_fw) 4070 return -EINVAL; 4071 4072 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4073 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4074 4075 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4076 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4077 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4078 4079 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4080 for (i = 0; i < fw_size; i++) 4081 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4082 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4083 4084 return 0; 4085 } 4086 4087 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4088 { 4089 int r; 4090 u32 tmp; 4091 4092 gfx_v8_0_rlc_stop(adev); 4093 4094 /* disable CG */ 4095 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4096 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4097 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4098 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4099 if (adev->asic_type == CHIP_POLARIS11 || 4100 adev->asic_type == CHIP_POLARIS10) { 4101 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4102 tmp &= ~0x3; 4103 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4104 } 4105 4106 /* disable PG */ 4107 WREG32(mmRLC_PG_CNTL, 0); 4108 4109 gfx_v8_0_rlc_reset(adev); 4110 gfx_v8_0_init_pg(adev); 4111 4112 if (!adev->pp_enabled) { 4113 if (!adev->firmware.smu_load) { 4114 /* legacy rlc firmware loading */ 4115 r = gfx_v8_0_rlc_load_microcode(adev); 4116 if (r) 4117 return r; 4118 } else { 4119 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4120 AMDGPU_UCODE_ID_RLC_G); 4121 if (r) 4122 return -EINVAL; 4123 } 4124 } 4125 4126 gfx_v8_0_rlc_start(adev); 4127 4128 return 0; 4129 } 4130 4131 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4132 { 4133 int i; 4134 u32 tmp = RREG32(mmCP_ME_CNTL); 4135 4136 if (enable) { 4137 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4138 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4139 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4140 } else { 4141 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4142 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4143 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4144 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4145 adev->gfx.gfx_ring[i].ready = false; 4146 } 4147 WREG32(mmCP_ME_CNTL, tmp); 4148 udelay(50); 4149 } 4150 4151 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4152 { 4153 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4154 const struct gfx_firmware_header_v1_0 *ce_hdr; 4155 const struct gfx_firmware_header_v1_0 *me_hdr; 4156 const __le32 *fw_data; 4157 unsigned i, fw_size; 4158 4159 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4160 return -EINVAL; 4161 4162 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4163 adev->gfx.pfp_fw->data; 4164 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4165 adev->gfx.ce_fw->data; 4166 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4167 adev->gfx.me_fw->data; 4168 4169 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4170 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4171 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4172 4173 gfx_v8_0_cp_gfx_enable(adev, false); 4174 4175 /* PFP */ 4176 fw_data = (const __le32 *) 4177 (adev->gfx.pfp_fw->data + 4178 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4179 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4180 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4181 for (i = 0; i < fw_size; i++) 4182 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4183 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4184 4185 /* CE */ 4186 fw_data = (const __le32 *) 4187 (adev->gfx.ce_fw->data + 4188 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4189 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4190 WREG32(mmCP_CE_UCODE_ADDR, 0); 4191 for (i = 0; i < fw_size; i++) 4192 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4193 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4194 4195 /* ME */ 4196 fw_data = (const __le32 *) 4197 (adev->gfx.me_fw->data + 4198 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4199 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4200 WREG32(mmCP_ME_RAM_WADDR, 0); 4201 for (i = 0; i < fw_size; i++) 4202 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4203 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4204 4205 return 0; 4206 } 4207 4208 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4209 { 4210 u32 count = 0; 4211 const struct cs_section_def *sect = NULL; 4212 const struct cs_extent_def *ext = NULL; 4213 4214 /* begin clear state */ 4215 count += 2; 4216 /* context control state */ 4217 count += 3; 4218 4219 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4220 for (ext = sect->section; ext->extent != NULL; ++ext) { 4221 if (sect->id == SECT_CONTEXT) 4222 count += 2 + ext->reg_count; 4223 else 4224 return 0; 4225 } 4226 } 4227 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4228 count += 4; 4229 /* end clear state */ 4230 count += 2; 4231 /* clear state */ 4232 count += 2; 4233 4234 return count; 4235 } 4236 4237 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4238 { 4239 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4240 const struct cs_section_def *sect = NULL; 4241 const struct cs_extent_def *ext = NULL; 4242 int r, i; 4243 4244 /* init the CP */ 4245 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4246 WREG32(mmCP_ENDIAN_SWAP, 0); 4247 WREG32(mmCP_DEVICE_ID, 1); 4248 4249 gfx_v8_0_cp_gfx_enable(adev, true); 4250 4251 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4252 if (r) { 4253 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4254 return r; 4255 } 4256 4257 /* clear state buffer */ 4258 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4259 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4260 4261 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4262 amdgpu_ring_write(ring, 0x80000000); 4263 amdgpu_ring_write(ring, 0x80000000); 4264 4265 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4266 for (ext = sect->section; ext->extent != NULL; ++ext) { 4267 if (sect->id == SECT_CONTEXT) { 4268 amdgpu_ring_write(ring, 4269 PACKET3(PACKET3_SET_CONTEXT_REG, 4270 ext->reg_count)); 4271 amdgpu_ring_write(ring, 4272 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4273 for (i = 0; i < ext->reg_count; i++) 4274 amdgpu_ring_write(ring, ext->extent[i]); 4275 } 4276 } 4277 } 4278 4279 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4280 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4281 switch (adev->asic_type) { 4282 case CHIP_TONGA: 4283 case CHIP_POLARIS10: 4284 amdgpu_ring_write(ring, 0x16000012); 4285 amdgpu_ring_write(ring, 0x0000002A); 4286 break; 4287 case CHIP_POLARIS11: 4288 amdgpu_ring_write(ring, 0x16000012); 4289 amdgpu_ring_write(ring, 0x00000000); 4290 break; 4291 case CHIP_FIJI: 4292 amdgpu_ring_write(ring, 0x3a00161a); 4293 amdgpu_ring_write(ring, 0x0000002e); 4294 break; 4295 case CHIP_CARRIZO: 4296 amdgpu_ring_write(ring, 0x00000002); 4297 amdgpu_ring_write(ring, 0x00000000); 4298 break; 4299 case CHIP_TOPAZ: 4300 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4301 0x00000000 : 0x00000002); 4302 amdgpu_ring_write(ring, 0x00000000); 4303 break; 4304 case CHIP_STONEY: 4305 amdgpu_ring_write(ring, 0x00000000); 4306 amdgpu_ring_write(ring, 0x00000000); 4307 break; 4308 default: 4309 BUG(); 4310 } 4311 4312 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4313 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4314 4315 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4316 amdgpu_ring_write(ring, 0); 4317 4318 /* init the CE partitions */ 4319 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4320 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4321 amdgpu_ring_write(ring, 0x8000); 4322 amdgpu_ring_write(ring, 0x8000); 4323 4324 amdgpu_ring_commit(ring); 4325 4326 return 0; 4327 } 4328 4329 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4330 { 4331 struct amdgpu_ring *ring; 4332 u32 tmp; 4333 u32 rb_bufsz; 4334 u64 rb_addr, rptr_addr; 4335 int r; 4336 4337 /* Set the write pointer delay */ 4338 WREG32(mmCP_RB_WPTR_DELAY, 0); 4339 4340 /* set the RB to use vmid 0 */ 4341 WREG32(mmCP_RB_VMID, 0); 4342 4343 /* Set ring buffer size */ 4344 ring = &adev->gfx.gfx_ring[0]; 4345 rb_bufsz = order_base_2(ring->ring_size / 8); 4346 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4347 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4348 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4349 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4350 #ifdef __BIG_ENDIAN 4351 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4352 #endif 4353 WREG32(mmCP_RB0_CNTL, tmp); 4354 4355 /* Initialize the ring buffer's read and write pointers */ 4356 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4357 ring->wptr = 0; 4358 WREG32(mmCP_RB0_WPTR, ring->wptr); 4359 4360 /* set the wb address wether it's enabled or not */ 4361 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4362 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4363 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4364 4365 mdelay(1); 4366 WREG32(mmCP_RB0_CNTL, tmp); 4367 4368 rb_addr = ring->gpu_addr >> 8; 4369 WREG32(mmCP_RB0_BASE, rb_addr); 4370 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4371 4372 /* no gfx doorbells on iceland */ 4373 if (adev->asic_type != CHIP_TOPAZ) { 4374 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4375 if (ring->use_doorbell) { 4376 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4377 DOORBELL_OFFSET, ring->doorbell_index); 4378 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4379 DOORBELL_HIT, 0); 4380 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4381 DOORBELL_EN, 1); 4382 } else { 4383 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4384 DOORBELL_EN, 0); 4385 } 4386 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4387 4388 if (adev->asic_type == CHIP_TONGA) { 4389 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4390 DOORBELL_RANGE_LOWER, 4391 AMDGPU_DOORBELL_GFX_RING0); 4392 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4393 4394 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4395 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4396 } 4397 4398 } 4399 4400 /* start the ring */ 4401 gfx_v8_0_cp_gfx_start(adev); 4402 ring->ready = true; 4403 r = amdgpu_ring_test_ring(ring); 4404 if (r) 4405 ring->ready = false; 4406 4407 return r; 4408 } 4409 4410 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4411 { 4412 int i; 4413 4414 if (enable) { 4415 WREG32(mmCP_MEC_CNTL, 0); 4416 } else { 4417 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4418 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4419 adev->gfx.compute_ring[i].ready = false; 4420 } 4421 udelay(50); 4422 } 4423 4424 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4425 { 4426 const struct gfx_firmware_header_v1_0 *mec_hdr; 4427 const __le32 *fw_data; 4428 unsigned i, fw_size; 4429 4430 if (!adev->gfx.mec_fw) 4431 return -EINVAL; 4432 4433 gfx_v8_0_cp_compute_enable(adev, false); 4434 4435 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4436 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4437 4438 fw_data = (const __le32 *) 4439 (adev->gfx.mec_fw->data + 4440 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4441 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4442 4443 /* MEC1 */ 4444 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4445 for (i = 0; i < fw_size; i++) 4446 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4447 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4448 4449 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4450 if (adev->gfx.mec2_fw) { 4451 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4452 4453 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4454 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4455 4456 fw_data = (const __le32 *) 4457 (adev->gfx.mec2_fw->data + 4458 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4459 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4460 4461 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4462 for (i = 0; i < fw_size; i++) 4463 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4464 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4465 } 4466 4467 return 0; 4468 } 4469 4470 struct vi_mqd { 4471 uint32_t header; /* ordinal0 */ 4472 uint32_t compute_dispatch_initiator; /* ordinal1 */ 4473 uint32_t compute_dim_x; /* ordinal2 */ 4474 uint32_t compute_dim_y; /* ordinal3 */ 4475 uint32_t compute_dim_z; /* ordinal4 */ 4476 uint32_t compute_start_x; /* ordinal5 */ 4477 uint32_t compute_start_y; /* ordinal6 */ 4478 uint32_t compute_start_z; /* ordinal7 */ 4479 uint32_t compute_num_thread_x; /* ordinal8 */ 4480 uint32_t compute_num_thread_y; /* ordinal9 */ 4481 uint32_t compute_num_thread_z; /* ordinal10 */ 4482 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 4483 uint32_t compute_perfcount_enable; /* ordinal12 */ 4484 uint32_t compute_pgm_lo; /* ordinal13 */ 4485 uint32_t compute_pgm_hi; /* ordinal14 */ 4486 uint32_t compute_tba_lo; /* ordinal15 */ 4487 uint32_t compute_tba_hi; /* ordinal16 */ 4488 uint32_t compute_tma_lo; /* ordinal17 */ 4489 uint32_t compute_tma_hi; /* ordinal18 */ 4490 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 4491 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 4492 uint32_t compute_vmid; /* ordinal21 */ 4493 uint32_t compute_resource_limits; /* ordinal22 */ 4494 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 4495 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 4496 uint32_t compute_tmpring_size; /* ordinal25 */ 4497 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 4498 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 4499 uint32_t compute_restart_x; /* ordinal28 */ 4500 uint32_t compute_restart_y; /* ordinal29 */ 4501 uint32_t compute_restart_z; /* ordinal30 */ 4502 uint32_t compute_thread_trace_enable; /* ordinal31 */ 4503 uint32_t compute_misc_reserved; /* ordinal32 */ 4504 uint32_t compute_dispatch_id; /* ordinal33 */ 4505 uint32_t compute_threadgroup_id; /* ordinal34 */ 4506 uint32_t compute_relaunch; /* ordinal35 */ 4507 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 4508 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 4509 uint32_t compute_wave_restore_control; /* ordinal38 */ 4510 uint32_t reserved9; /* ordinal39 */ 4511 uint32_t reserved10; /* ordinal40 */ 4512 uint32_t reserved11; /* ordinal41 */ 4513 uint32_t reserved12; /* ordinal42 */ 4514 uint32_t reserved13; /* ordinal43 */ 4515 uint32_t reserved14; /* ordinal44 */ 4516 uint32_t reserved15; /* ordinal45 */ 4517 uint32_t reserved16; /* ordinal46 */ 4518 uint32_t reserved17; /* ordinal47 */ 4519 uint32_t reserved18; /* ordinal48 */ 4520 uint32_t reserved19; /* ordinal49 */ 4521 uint32_t reserved20; /* ordinal50 */ 4522 uint32_t reserved21; /* ordinal51 */ 4523 uint32_t reserved22; /* ordinal52 */ 4524 uint32_t reserved23; /* ordinal53 */ 4525 uint32_t reserved24; /* ordinal54 */ 4526 uint32_t reserved25; /* ordinal55 */ 4527 uint32_t reserved26; /* ordinal56 */ 4528 uint32_t reserved27; /* ordinal57 */ 4529 uint32_t reserved28; /* ordinal58 */ 4530 uint32_t reserved29; /* ordinal59 */ 4531 uint32_t reserved30; /* ordinal60 */ 4532 uint32_t reserved31; /* ordinal61 */ 4533 uint32_t reserved32; /* ordinal62 */ 4534 uint32_t reserved33; /* ordinal63 */ 4535 uint32_t reserved34; /* ordinal64 */ 4536 uint32_t compute_user_data_0; /* ordinal65 */ 4537 uint32_t compute_user_data_1; /* ordinal66 */ 4538 uint32_t compute_user_data_2; /* ordinal67 */ 4539 uint32_t compute_user_data_3; /* ordinal68 */ 4540 uint32_t compute_user_data_4; /* ordinal69 */ 4541 uint32_t compute_user_data_5; /* ordinal70 */ 4542 uint32_t compute_user_data_6; /* ordinal71 */ 4543 uint32_t compute_user_data_7; /* ordinal72 */ 4544 uint32_t compute_user_data_8; /* ordinal73 */ 4545 uint32_t compute_user_data_9; /* ordinal74 */ 4546 uint32_t compute_user_data_10; /* ordinal75 */ 4547 uint32_t compute_user_data_11; /* ordinal76 */ 4548 uint32_t compute_user_data_12; /* ordinal77 */ 4549 uint32_t compute_user_data_13; /* ordinal78 */ 4550 uint32_t compute_user_data_14; /* ordinal79 */ 4551 uint32_t compute_user_data_15; /* ordinal80 */ 4552 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 4553 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 4554 uint32_t reserved35; /* ordinal83 */ 4555 uint32_t reserved36; /* ordinal84 */ 4556 uint32_t reserved37; /* ordinal85 */ 4557 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 4558 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 4559 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 4560 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 4561 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 4562 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 4563 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 4564 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 4565 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 4566 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 4567 uint32_t reserved38; /* ordinal96 */ 4568 uint32_t reserved39; /* ordinal97 */ 4569 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 4570 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 4571 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 4572 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 4573 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 4574 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 4575 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 4576 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 4577 uint32_t reserved40; /* ordinal106 */ 4578 uint32_t reserved41; /* ordinal107 */ 4579 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 4580 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 4581 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 4582 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 4583 uint32_t reserved42; /* ordinal112 */ 4584 uint32_t reserved43; /* ordinal113 */ 4585 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 4586 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 4587 uint32_t cp_packet_id_lo; /* ordinal116 */ 4588 uint32_t cp_packet_id_hi; /* ordinal117 */ 4589 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 4590 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 4591 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 4592 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 4593 uint32_t gds_save_mask_lo; /* ordinal122 */ 4594 uint32_t gds_save_mask_hi; /* ordinal123 */ 4595 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 4596 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 4597 uint32_t reserved44; /* ordinal126 */ 4598 uint32_t reserved45; /* ordinal127 */ 4599 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 4600 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 4601 uint32_t cp_hqd_active; /* ordinal130 */ 4602 uint32_t cp_hqd_vmid; /* ordinal131 */ 4603 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 4604 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 4605 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 4606 uint32_t cp_hqd_quantum; /* ordinal135 */ 4607 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 4608 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 4609 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 4610 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 4611 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 4612 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 4613 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 4614 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 4615 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 4616 uint32_t cp_hqd_pq_control; /* ordinal145 */ 4617 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 4618 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 4619 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 4620 uint32_t cp_hqd_ib_control; /* ordinal149 */ 4621 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 4622 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 4623 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 4624 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 4625 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 4626 uint32_t cp_hqd_msg_type; /* ordinal155 */ 4627 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 4628 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 4629 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 4630 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 4631 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 4632 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 4633 uint32_t cp_mqd_control; /* ordinal162 */ 4634 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 4635 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 4636 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 4637 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 4638 uint32_t cp_hqd_eop_control; /* ordinal167 */ 4639 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 4640 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 4641 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 4642 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 4643 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 4644 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 4645 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 4646 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 4647 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 4648 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 4649 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 4650 uint32_t cp_hqd_error; /* ordinal179 */ 4651 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 4652 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 4653 uint32_t reserved46; /* ordinal182 */ 4654 uint32_t reserved47; /* ordinal183 */ 4655 uint32_t reserved48; /* ordinal184 */ 4656 uint32_t reserved49; /* ordinal185 */ 4657 uint32_t reserved50; /* ordinal186 */ 4658 uint32_t reserved51; /* ordinal187 */ 4659 uint32_t reserved52; /* ordinal188 */ 4660 uint32_t reserved53; /* ordinal189 */ 4661 uint32_t reserved54; /* ordinal190 */ 4662 uint32_t reserved55; /* ordinal191 */ 4663 uint32_t iqtimer_pkt_header; /* ordinal192 */ 4664 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 4665 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 4666 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 4667 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 4668 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 4669 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 4670 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 4671 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 4672 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 4673 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 4674 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 4675 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 4676 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 4677 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 4678 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 4679 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 4680 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 4681 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 4682 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 4683 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 4684 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 4685 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 4686 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 4687 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 4688 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 4689 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 4690 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 4691 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 4692 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 4693 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 4694 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 4695 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 4696 uint32_t reserved56; /* ordinal225 */ 4697 uint32_t reserved57; /* ordinal226 */ 4698 uint32_t reserved58; /* ordinal227 */ 4699 uint32_t set_resources_header; /* ordinal228 */ 4700 uint32_t set_resources_dw1; /* ordinal229 */ 4701 uint32_t set_resources_dw2; /* ordinal230 */ 4702 uint32_t set_resources_dw3; /* ordinal231 */ 4703 uint32_t set_resources_dw4; /* ordinal232 */ 4704 uint32_t set_resources_dw5; /* ordinal233 */ 4705 uint32_t set_resources_dw6; /* ordinal234 */ 4706 uint32_t set_resources_dw7; /* ordinal235 */ 4707 uint32_t reserved59; /* ordinal236 */ 4708 uint32_t reserved60; /* ordinal237 */ 4709 uint32_t reserved61; /* ordinal238 */ 4710 uint32_t reserved62; /* ordinal239 */ 4711 uint32_t reserved63; /* ordinal240 */ 4712 uint32_t reserved64; /* ordinal241 */ 4713 uint32_t reserved65; /* ordinal242 */ 4714 uint32_t reserved66; /* ordinal243 */ 4715 uint32_t reserved67; /* ordinal244 */ 4716 uint32_t reserved68; /* ordinal245 */ 4717 uint32_t reserved69; /* ordinal246 */ 4718 uint32_t reserved70; /* ordinal247 */ 4719 uint32_t reserved71; /* ordinal248 */ 4720 uint32_t reserved72; /* ordinal249 */ 4721 uint32_t reserved73; /* ordinal250 */ 4722 uint32_t reserved74; /* ordinal251 */ 4723 uint32_t reserved75; /* ordinal252 */ 4724 uint32_t reserved76; /* ordinal253 */ 4725 uint32_t reserved77; /* ordinal254 */ 4726 uint32_t reserved78; /* ordinal255 */ 4727 4728 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 4729 }; 4730 4731 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4732 { 4733 int i, r; 4734 4735 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4736 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4737 4738 if (ring->mqd_obj) { 4739 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4740 if (unlikely(r != 0)) 4741 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4742 4743 amdgpu_bo_unpin(ring->mqd_obj); 4744 amdgpu_bo_unreserve(ring->mqd_obj); 4745 4746 amdgpu_bo_unref(&ring->mqd_obj); 4747 ring->mqd_obj = NULL; 4748 } 4749 } 4750 } 4751 4752 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4753 { 4754 int r, i, j; 4755 u32 tmp; 4756 bool use_doorbell = true; 4757 u64 hqd_gpu_addr; 4758 u64 mqd_gpu_addr; 4759 u64 eop_gpu_addr; 4760 u64 wb_gpu_addr; 4761 u32 *buf; 4762 struct vi_mqd *mqd; 4763 4764 /* init the pipes */ 4765 mutex_lock(&adev->srbm_mutex); 4766 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 4767 int me = (i < 4) ? 1 : 2; 4768 int pipe = (i < 4) ? i : (i - 4); 4769 4770 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4771 eop_gpu_addr >>= 8; 4772 4773 vi_srbm_select(adev, me, pipe, 0, 0); 4774 4775 /* write the EOP addr */ 4776 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4777 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4778 4779 /* set the VMID assigned */ 4780 WREG32(mmCP_HQD_VMID, 0); 4781 4782 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4783 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4784 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4785 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4786 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4787 } 4788 vi_srbm_select(adev, 0, 0, 0, 0); 4789 mutex_unlock(&adev->srbm_mutex); 4790 4791 /* init the queues. Just two for now. */ 4792 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4793 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4794 4795 if (ring->mqd_obj == NULL) { 4796 r = amdgpu_bo_create(adev, 4797 sizeof(struct vi_mqd), 4798 PAGE_SIZE, true, 4799 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4800 NULL, &ring->mqd_obj); 4801 if (r) { 4802 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4803 return r; 4804 } 4805 } 4806 4807 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4808 if (unlikely(r != 0)) { 4809 gfx_v8_0_cp_compute_fini(adev); 4810 return r; 4811 } 4812 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4813 &mqd_gpu_addr); 4814 if (r) { 4815 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4816 gfx_v8_0_cp_compute_fini(adev); 4817 return r; 4818 } 4819 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4820 if (r) { 4821 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4822 gfx_v8_0_cp_compute_fini(adev); 4823 return r; 4824 } 4825 4826 /* init the mqd struct */ 4827 memset(buf, 0, sizeof(struct vi_mqd)); 4828 4829 mqd = (struct vi_mqd *)buf; 4830 mqd->header = 0xC0310800; 4831 mqd->compute_pipelinestat_enable = 0x00000001; 4832 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4833 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4834 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4835 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4836 mqd->compute_misc_reserved = 0x00000003; 4837 4838 mutex_lock(&adev->srbm_mutex); 4839 vi_srbm_select(adev, ring->me, 4840 ring->pipe, 4841 ring->queue, 0); 4842 4843 /* disable wptr polling */ 4844 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4845 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4846 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4847 4848 mqd->cp_hqd_eop_base_addr_lo = 4849 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4850 mqd->cp_hqd_eop_base_addr_hi = 4851 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4852 4853 /* enable doorbell? */ 4854 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4855 if (use_doorbell) { 4856 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4857 } else { 4858 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4859 } 4860 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4861 mqd->cp_hqd_pq_doorbell_control = tmp; 4862 4863 /* disable the queue if it's active */ 4864 mqd->cp_hqd_dequeue_request = 0; 4865 mqd->cp_hqd_pq_rptr = 0; 4866 mqd->cp_hqd_pq_wptr= 0; 4867 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4868 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4869 for (j = 0; j < adev->usec_timeout; j++) { 4870 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4871 break; 4872 udelay(1); 4873 } 4874 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4875 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4876 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4877 } 4878 4879 /* set the pointer to the MQD */ 4880 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4881 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4882 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4883 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4884 4885 /* set MQD vmid to 0 */ 4886 tmp = RREG32(mmCP_MQD_CONTROL); 4887 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4888 WREG32(mmCP_MQD_CONTROL, tmp); 4889 mqd->cp_mqd_control = tmp; 4890 4891 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4892 hqd_gpu_addr = ring->gpu_addr >> 8; 4893 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4894 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4895 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4896 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4897 4898 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4899 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4900 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4901 (order_base_2(ring->ring_size / 4) - 1)); 4902 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4903 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4904 #ifdef __BIG_ENDIAN 4905 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4906 #endif 4907 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4908 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4909 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4910 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4911 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4912 mqd->cp_hqd_pq_control = tmp; 4913 4914 /* set the wb address wether it's enabled or not */ 4915 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4916 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4917 mqd->cp_hqd_pq_rptr_report_addr_hi = 4918 upper_32_bits(wb_gpu_addr) & 0xffff; 4919 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4920 mqd->cp_hqd_pq_rptr_report_addr_lo); 4921 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4922 mqd->cp_hqd_pq_rptr_report_addr_hi); 4923 4924 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4925 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4926 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4927 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4928 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 4929 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4930 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4931 4932 /* enable the doorbell if requested */ 4933 if (use_doorbell) { 4934 if ((adev->asic_type == CHIP_CARRIZO) || 4935 (adev->asic_type == CHIP_FIJI) || 4936 (adev->asic_type == CHIP_STONEY) || 4937 (adev->asic_type == CHIP_POLARIS11) || 4938 (adev->asic_type == CHIP_POLARIS10)) { 4939 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4940 AMDGPU_DOORBELL_KIQ << 2); 4941 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4942 AMDGPU_DOORBELL_MEC_RING7 << 2); 4943 } 4944 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4945 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4946 DOORBELL_OFFSET, ring->doorbell_index); 4947 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4948 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4949 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4950 mqd->cp_hqd_pq_doorbell_control = tmp; 4951 4952 } else { 4953 mqd->cp_hqd_pq_doorbell_control = 0; 4954 } 4955 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4956 mqd->cp_hqd_pq_doorbell_control); 4957 4958 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4959 ring->wptr = 0; 4960 mqd->cp_hqd_pq_wptr = ring->wptr; 4961 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4962 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4963 4964 /* set the vmid for the queue */ 4965 mqd->cp_hqd_vmid = 0; 4966 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4967 4968 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4969 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4970 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4971 mqd->cp_hqd_persistent_state = tmp; 4972 if (adev->asic_type == CHIP_STONEY || 4973 adev->asic_type == CHIP_POLARIS11 || 4974 adev->asic_type == CHIP_POLARIS10) { 4975 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4976 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4977 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4978 } 4979 4980 /* activate the queue */ 4981 mqd->cp_hqd_active = 1; 4982 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4983 4984 vi_srbm_select(adev, 0, 0, 0, 0); 4985 mutex_unlock(&adev->srbm_mutex); 4986 4987 amdgpu_bo_kunmap(ring->mqd_obj); 4988 amdgpu_bo_unreserve(ring->mqd_obj); 4989 } 4990 4991 if (use_doorbell) { 4992 tmp = RREG32(mmCP_PQ_STATUS); 4993 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4994 WREG32(mmCP_PQ_STATUS, tmp); 4995 } 4996 4997 gfx_v8_0_cp_compute_enable(adev, true); 4998 4999 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5000 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5001 5002 ring->ready = true; 5003 r = amdgpu_ring_test_ring(ring); 5004 if (r) 5005 ring->ready = false; 5006 } 5007 5008 return 0; 5009 } 5010 5011 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5012 { 5013 int r; 5014 5015 if (!(adev->flags & AMD_IS_APU)) 5016 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5017 5018 if (!adev->pp_enabled) { 5019 if (!adev->firmware.smu_load) { 5020 /* legacy firmware loading */ 5021 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5022 if (r) 5023 return r; 5024 5025 r = gfx_v8_0_cp_compute_load_microcode(adev); 5026 if (r) 5027 return r; 5028 } else { 5029 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5030 AMDGPU_UCODE_ID_CP_CE); 5031 if (r) 5032 return -EINVAL; 5033 5034 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5035 AMDGPU_UCODE_ID_CP_PFP); 5036 if (r) 5037 return -EINVAL; 5038 5039 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5040 AMDGPU_UCODE_ID_CP_ME); 5041 if (r) 5042 return -EINVAL; 5043 5044 if (adev->asic_type == CHIP_TOPAZ) { 5045 r = gfx_v8_0_cp_compute_load_microcode(adev); 5046 if (r) 5047 return r; 5048 } else { 5049 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5050 AMDGPU_UCODE_ID_CP_MEC1); 5051 if (r) 5052 return -EINVAL; 5053 } 5054 } 5055 } 5056 5057 r = gfx_v8_0_cp_gfx_resume(adev); 5058 if (r) 5059 return r; 5060 5061 r = gfx_v8_0_cp_compute_resume(adev); 5062 if (r) 5063 return r; 5064 5065 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5066 5067 return 0; 5068 } 5069 5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5071 { 5072 gfx_v8_0_cp_gfx_enable(adev, enable); 5073 gfx_v8_0_cp_compute_enable(adev, enable); 5074 } 5075 5076 static int gfx_v8_0_hw_init(void *handle) 5077 { 5078 int r; 5079 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5080 5081 gfx_v8_0_init_golden_registers(adev); 5082 gfx_v8_0_gpu_init(adev); 5083 5084 r = gfx_v8_0_rlc_resume(adev); 5085 if (r) 5086 return r; 5087 5088 r = gfx_v8_0_cp_resume(adev); 5089 5090 return r; 5091 } 5092 5093 static int gfx_v8_0_hw_fini(void *handle) 5094 { 5095 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5096 5097 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5098 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5099 gfx_v8_0_cp_enable(adev, false); 5100 gfx_v8_0_rlc_stop(adev); 5101 gfx_v8_0_cp_compute_fini(adev); 5102 5103 amdgpu_set_powergating_state(adev, 5104 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5105 5106 return 0; 5107 } 5108 5109 static int gfx_v8_0_suspend(void *handle) 5110 { 5111 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5112 5113 return gfx_v8_0_hw_fini(adev); 5114 } 5115 5116 static int gfx_v8_0_resume(void *handle) 5117 { 5118 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5119 5120 return gfx_v8_0_hw_init(adev); 5121 } 5122 5123 static bool gfx_v8_0_is_idle(void *handle) 5124 { 5125 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5126 5127 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5128 return false; 5129 else 5130 return true; 5131 } 5132 5133 static int gfx_v8_0_wait_for_idle(void *handle) 5134 { 5135 unsigned i; 5136 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5137 5138 for (i = 0; i < adev->usec_timeout; i++) { 5139 if (gfx_v8_0_is_idle(handle)) 5140 return 0; 5141 5142 udelay(1); 5143 } 5144 return -ETIMEDOUT; 5145 } 5146 5147 static bool gfx_v8_0_check_soft_reset(void *handle) 5148 { 5149 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5150 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5151 u32 tmp; 5152 5153 /* GRBM_STATUS */ 5154 tmp = RREG32(mmGRBM_STATUS); 5155 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5156 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5157 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5158 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5159 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5160 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5161 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5162 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5163 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5164 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5165 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5166 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5167 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5168 } 5169 5170 /* GRBM_STATUS2 */ 5171 tmp = RREG32(mmGRBM_STATUS2); 5172 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5173 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5174 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5175 5176 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5177 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5178 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5179 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5180 SOFT_RESET_CPF, 1); 5181 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5182 SOFT_RESET_CPC, 1); 5183 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5184 SOFT_RESET_CPG, 1); 5185 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5186 SOFT_RESET_GRBM, 1); 5187 } 5188 5189 /* SRBM_STATUS */ 5190 tmp = RREG32(mmSRBM_STATUS); 5191 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5192 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5193 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5194 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5195 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5196 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5197 5198 if (grbm_soft_reset || srbm_soft_reset) { 5199 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5200 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5201 return true; 5202 } else { 5203 adev->gfx.grbm_soft_reset = 0; 5204 adev->gfx.srbm_soft_reset = 0; 5205 return false; 5206 } 5207 } 5208 5209 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5210 struct amdgpu_ring *ring) 5211 { 5212 int i; 5213 5214 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5215 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5216 u32 tmp; 5217 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST); 5218 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST, 5219 DEQUEUE_REQ, 2); 5220 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp); 5221 for (i = 0; i < adev->usec_timeout; i++) { 5222 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5223 break; 5224 udelay(1); 5225 } 5226 } 5227 } 5228 5229 static int gfx_v8_0_pre_soft_reset(void *handle) 5230 { 5231 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5232 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5233 5234 if ((!adev->gfx.grbm_soft_reset) && 5235 (!adev->gfx.srbm_soft_reset)) 5236 return 0; 5237 5238 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5239 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5240 5241 /* stop the rlc */ 5242 gfx_v8_0_rlc_stop(adev); 5243 5244 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5245 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5246 /* Disable GFX parsing/prefetching */ 5247 gfx_v8_0_cp_gfx_enable(adev, false); 5248 5249 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5250 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5251 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5252 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5253 int i; 5254 5255 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5256 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5257 5258 gfx_v8_0_inactive_hqd(adev, ring); 5259 } 5260 /* Disable MEC parsing/prefetching */ 5261 gfx_v8_0_cp_compute_enable(adev, false); 5262 } 5263 5264 return 0; 5265 } 5266 5267 static int gfx_v8_0_soft_reset(void *handle) 5268 { 5269 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5270 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5271 u32 tmp; 5272 5273 if ((!adev->gfx.grbm_soft_reset) && 5274 (!adev->gfx.srbm_soft_reset)) 5275 return 0; 5276 5277 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5278 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5279 5280 if (grbm_soft_reset || srbm_soft_reset) { 5281 tmp = RREG32(mmGMCON_DEBUG); 5282 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5283 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5284 WREG32(mmGMCON_DEBUG, tmp); 5285 udelay(50); 5286 } 5287 5288 if (grbm_soft_reset) { 5289 tmp = RREG32(mmGRBM_SOFT_RESET); 5290 tmp |= grbm_soft_reset; 5291 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5292 WREG32(mmGRBM_SOFT_RESET, tmp); 5293 tmp = RREG32(mmGRBM_SOFT_RESET); 5294 5295 udelay(50); 5296 5297 tmp &= ~grbm_soft_reset; 5298 WREG32(mmGRBM_SOFT_RESET, tmp); 5299 tmp = RREG32(mmGRBM_SOFT_RESET); 5300 } 5301 5302 if (srbm_soft_reset) { 5303 tmp = RREG32(mmSRBM_SOFT_RESET); 5304 tmp |= srbm_soft_reset; 5305 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5306 WREG32(mmSRBM_SOFT_RESET, tmp); 5307 tmp = RREG32(mmSRBM_SOFT_RESET); 5308 5309 udelay(50); 5310 5311 tmp &= ~srbm_soft_reset; 5312 WREG32(mmSRBM_SOFT_RESET, tmp); 5313 tmp = RREG32(mmSRBM_SOFT_RESET); 5314 } 5315 5316 if (grbm_soft_reset || srbm_soft_reset) { 5317 tmp = RREG32(mmGMCON_DEBUG); 5318 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5319 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5320 WREG32(mmGMCON_DEBUG, tmp); 5321 } 5322 5323 /* Wait a little for things to settle down */ 5324 udelay(50); 5325 5326 return 0; 5327 } 5328 5329 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5330 struct amdgpu_ring *ring) 5331 { 5332 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5333 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5334 WREG32(mmCP_HQD_PQ_RPTR, 0); 5335 WREG32(mmCP_HQD_PQ_WPTR, 0); 5336 vi_srbm_select(adev, 0, 0, 0, 0); 5337 } 5338 5339 static int gfx_v8_0_post_soft_reset(void *handle) 5340 { 5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5342 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5343 5344 if ((!adev->gfx.grbm_soft_reset) && 5345 (!adev->gfx.srbm_soft_reset)) 5346 return 0; 5347 5348 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5349 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5350 5351 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5352 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5353 gfx_v8_0_cp_gfx_resume(adev); 5354 5355 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5356 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5357 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5358 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5359 int i; 5360 5361 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5362 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5363 5364 gfx_v8_0_init_hqd(adev, ring); 5365 } 5366 gfx_v8_0_cp_compute_resume(adev); 5367 } 5368 gfx_v8_0_rlc_start(adev); 5369 5370 return 0; 5371 } 5372 5373 /** 5374 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5375 * 5376 * @adev: amdgpu_device pointer 5377 * 5378 * Fetches a GPU clock counter snapshot. 5379 * Returns the 64 bit clock counter snapshot. 5380 */ 5381 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5382 { 5383 uint64_t clock; 5384 5385 mutex_lock(&adev->gfx.gpu_clock_mutex); 5386 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5387 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5388 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5389 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5390 return clock; 5391 } 5392 5393 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5394 uint32_t vmid, 5395 uint32_t gds_base, uint32_t gds_size, 5396 uint32_t gws_base, uint32_t gws_size, 5397 uint32_t oa_base, uint32_t oa_size) 5398 { 5399 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5400 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5401 5402 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5403 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5404 5405 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5406 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5407 5408 /* GDS Base */ 5409 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5410 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5411 WRITE_DATA_DST_SEL(0))); 5412 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5413 amdgpu_ring_write(ring, 0); 5414 amdgpu_ring_write(ring, gds_base); 5415 5416 /* GDS Size */ 5417 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5418 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5419 WRITE_DATA_DST_SEL(0))); 5420 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5421 amdgpu_ring_write(ring, 0); 5422 amdgpu_ring_write(ring, gds_size); 5423 5424 /* GWS */ 5425 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5426 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5427 WRITE_DATA_DST_SEL(0))); 5428 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5429 amdgpu_ring_write(ring, 0); 5430 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5431 5432 /* OA */ 5433 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5434 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5435 WRITE_DATA_DST_SEL(0))); 5436 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5437 amdgpu_ring_write(ring, 0); 5438 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5439 } 5440 5441 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5442 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5443 .select_se_sh = &gfx_v8_0_select_se_sh, 5444 }; 5445 5446 static int gfx_v8_0_early_init(void *handle) 5447 { 5448 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5449 5450 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5451 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5452 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5453 gfx_v8_0_set_ring_funcs(adev); 5454 gfx_v8_0_set_irq_funcs(adev); 5455 gfx_v8_0_set_gds_init(adev); 5456 gfx_v8_0_set_rlc_funcs(adev); 5457 5458 return 0; 5459 } 5460 5461 static int gfx_v8_0_late_init(void *handle) 5462 { 5463 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5464 int r; 5465 5466 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5467 if (r) 5468 return r; 5469 5470 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5471 if (r) 5472 return r; 5473 5474 /* requires IBs so do in late init after IB pool is initialized */ 5475 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5476 if (r) 5477 return r; 5478 5479 amdgpu_set_powergating_state(adev, 5480 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5481 5482 return 0; 5483 } 5484 5485 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5486 bool enable) 5487 { 5488 if (adev->asic_type == CHIP_POLARIS11) 5489 /* Send msg to SMU via Powerplay */ 5490 amdgpu_set_powergating_state(adev, 5491 AMD_IP_BLOCK_TYPE_SMC, 5492 enable ? 5493 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5494 5495 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5496 } 5497 5498 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5499 bool enable) 5500 { 5501 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5502 } 5503 5504 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5505 bool enable) 5506 { 5507 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5508 } 5509 5510 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5511 bool enable) 5512 { 5513 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5514 } 5515 5516 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5517 bool enable) 5518 { 5519 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5520 5521 /* Read any GFX register to wake up GFX. */ 5522 if (!enable) 5523 RREG32(mmDB_RENDER_CONTROL); 5524 } 5525 5526 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5527 bool enable) 5528 { 5529 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5530 cz_enable_gfx_cg_power_gating(adev, true); 5531 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5532 cz_enable_gfx_pipeline_power_gating(adev, true); 5533 } else { 5534 cz_enable_gfx_cg_power_gating(adev, false); 5535 cz_enable_gfx_pipeline_power_gating(adev, false); 5536 } 5537 } 5538 5539 static int gfx_v8_0_set_powergating_state(void *handle, 5540 enum amd_powergating_state state) 5541 { 5542 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5543 bool enable = (state == AMD_PG_STATE_GATE) ? true : false; 5544 5545 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5546 return 0; 5547 5548 switch (adev->asic_type) { 5549 case CHIP_CARRIZO: 5550 case CHIP_STONEY: 5551 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) 5552 cz_update_gfx_cg_power_gating(adev, enable); 5553 5554 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5555 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5556 else 5557 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5558 5559 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5560 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5561 else 5562 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5563 break; 5564 case CHIP_POLARIS11: 5565 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5566 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5567 else 5568 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5569 5570 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5571 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5572 else 5573 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5574 5575 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5576 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5577 else 5578 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5579 break; 5580 default: 5581 break; 5582 } 5583 5584 return 0; 5585 } 5586 5587 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5588 uint32_t reg_addr, uint32_t cmd) 5589 { 5590 uint32_t data; 5591 5592 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5593 5594 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5595 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5596 5597 data = RREG32(mmRLC_SERDES_WR_CTRL); 5598 if (adev->asic_type == CHIP_STONEY) 5599 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5600 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5601 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5602 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5603 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5604 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5605 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5606 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5607 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5608 else 5609 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5610 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5611 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5612 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5613 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5614 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5615 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5616 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5617 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5618 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5619 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5620 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5621 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5622 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5623 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5624 5625 WREG32(mmRLC_SERDES_WR_CTRL, data); 5626 } 5627 5628 #define MSG_ENTER_RLC_SAFE_MODE 1 5629 #define MSG_EXIT_RLC_SAFE_MODE 0 5630 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5631 #define RLC_GPR_REG2__REQ__SHIFT 0 5632 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5633 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5634 5635 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5636 { 5637 u32 data = 0; 5638 unsigned i; 5639 5640 data = RREG32(mmRLC_CNTL); 5641 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5642 return; 5643 5644 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5645 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5646 AMD_PG_SUPPORT_GFX_DMG))) { 5647 data |= RLC_GPR_REG2__REQ_MASK; 5648 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5649 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5650 WREG32(mmRLC_GPR_REG2, data); 5651 5652 for (i = 0; i < adev->usec_timeout; i++) { 5653 if ((RREG32(mmRLC_GPM_STAT) & 5654 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5655 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5656 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5657 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5658 break; 5659 udelay(1); 5660 } 5661 5662 for (i = 0; i < adev->usec_timeout; i++) { 5663 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5664 break; 5665 udelay(1); 5666 } 5667 adev->gfx.rlc.in_safe_mode = true; 5668 } 5669 } 5670 5671 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5672 { 5673 u32 data; 5674 unsigned i; 5675 5676 data = RREG32(mmRLC_CNTL); 5677 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5678 return; 5679 5680 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5681 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5682 AMD_PG_SUPPORT_GFX_DMG))) { 5683 data |= RLC_GPR_REG2__REQ_MASK; 5684 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5685 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5686 WREG32(mmRLC_GPR_REG2, data); 5687 adev->gfx.rlc.in_safe_mode = false; 5688 } 5689 5690 for (i = 0; i < adev->usec_timeout; i++) { 5691 if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) 5692 break; 5693 udelay(1); 5694 } 5695 } 5696 5697 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5698 { 5699 u32 data; 5700 unsigned i; 5701 5702 data = RREG32(mmRLC_CNTL); 5703 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5704 return; 5705 5706 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5707 data |= RLC_SAFE_MODE__CMD_MASK; 5708 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5709 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5710 WREG32(mmRLC_SAFE_MODE, data); 5711 5712 for (i = 0; i < adev->usec_timeout; i++) { 5713 if ((RREG32(mmRLC_GPM_STAT) & 5714 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5715 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5716 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5717 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5718 break; 5719 udelay(1); 5720 } 5721 5722 for (i = 0; i < adev->usec_timeout; i++) { 5723 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5724 break; 5725 udelay(1); 5726 } 5727 adev->gfx.rlc.in_safe_mode = true; 5728 } 5729 } 5730 5731 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5732 { 5733 u32 data = 0; 5734 unsigned i; 5735 5736 data = RREG32(mmRLC_CNTL); 5737 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5738 return; 5739 5740 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5741 if (adev->gfx.rlc.in_safe_mode) { 5742 data |= RLC_SAFE_MODE__CMD_MASK; 5743 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5744 WREG32(mmRLC_SAFE_MODE, data); 5745 adev->gfx.rlc.in_safe_mode = false; 5746 } 5747 } 5748 5749 for (i = 0; i < adev->usec_timeout; i++) { 5750 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5751 break; 5752 udelay(1); 5753 } 5754 } 5755 5756 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5757 { 5758 adev->gfx.rlc.in_safe_mode = true; 5759 } 5760 5761 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5762 { 5763 adev->gfx.rlc.in_safe_mode = false; 5764 } 5765 5766 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5767 .enter_safe_mode = cz_enter_rlc_safe_mode, 5768 .exit_safe_mode = cz_exit_rlc_safe_mode 5769 }; 5770 5771 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5772 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5773 .exit_safe_mode = iceland_exit_rlc_safe_mode 5774 }; 5775 5776 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5777 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5778 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5779 }; 5780 5781 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5782 bool enable) 5783 { 5784 uint32_t temp, data; 5785 5786 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5787 5788 /* It is disabled by HW by default */ 5789 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5790 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5791 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5792 /* 1 - RLC memory Light sleep */ 5793 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5794 5795 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5796 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5797 } 5798 5799 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5800 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5801 if (adev->flags & AMD_IS_APU) 5802 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5803 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5804 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5805 else 5806 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5807 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5808 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5809 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5810 5811 if (temp != data) 5812 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5813 5814 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5815 gfx_v8_0_wait_for_rlc_serdes(adev); 5816 5817 /* 5 - clear mgcg override */ 5818 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5819 5820 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5821 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5822 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5823 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5824 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5825 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5826 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5827 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5828 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5829 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5830 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5831 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5832 if (temp != data) 5833 WREG32(mmCGTS_SM_CTRL_REG, data); 5834 } 5835 udelay(50); 5836 5837 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5838 gfx_v8_0_wait_for_rlc_serdes(adev); 5839 } else { 5840 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5841 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5842 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5843 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5844 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5845 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5846 if (temp != data) 5847 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5848 5849 /* 2 - disable MGLS in RLC */ 5850 data = RREG32(mmRLC_MEM_SLP_CNTL); 5851 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5852 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5853 WREG32(mmRLC_MEM_SLP_CNTL, data); 5854 } 5855 5856 /* 3 - disable MGLS in CP */ 5857 data = RREG32(mmCP_MEM_SLP_CNTL); 5858 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5859 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5860 WREG32(mmCP_MEM_SLP_CNTL, data); 5861 } 5862 5863 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5864 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5865 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5866 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5867 if (temp != data) 5868 WREG32(mmCGTS_SM_CTRL_REG, data); 5869 5870 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5871 gfx_v8_0_wait_for_rlc_serdes(adev); 5872 5873 /* 6 - set mgcg override */ 5874 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5875 5876 udelay(50); 5877 5878 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5879 gfx_v8_0_wait_for_rlc_serdes(adev); 5880 } 5881 5882 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5883 } 5884 5885 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5886 bool enable) 5887 { 5888 uint32_t temp, temp1, data, data1; 5889 5890 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5891 5892 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5893 5894 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5895 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5896 * Cmp_busy/GFX_Idle interrupts 5897 */ 5898 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5899 5900 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5901 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5902 if (temp1 != data1) 5903 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5904 5905 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5906 gfx_v8_0_wait_for_rlc_serdes(adev); 5907 5908 /* 3 - clear cgcg override */ 5909 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5910 5911 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5912 gfx_v8_0_wait_for_rlc_serdes(adev); 5913 5914 /* 4 - write cmd to set CGLS */ 5915 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5916 5917 /* 5 - enable cgcg */ 5918 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5919 5920 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5921 /* enable cgls*/ 5922 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5923 5924 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5925 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5926 5927 if (temp1 != data1) 5928 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5929 } else { 5930 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5931 } 5932 5933 if (temp != data) 5934 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5935 } else { 5936 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5937 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5938 5939 /* TEST CGCG */ 5940 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5941 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5942 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5943 if (temp1 != data1) 5944 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5945 5946 /* read gfx register to wake up cgcg */ 5947 RREG32(mmCB_CGTT_SCLK_CTRL); 5948 RREG32(mmCB_CGTT_SCLK_CTRL); 5949 RREG32(mmCB_CGTT_SCLK_CTRL); 5950 RREG32(mmCB_CGTT_SCLK_CTRL); 5951 5952 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5953 gfx_v8_0_wait_for_rlc_serdes(adev); 5954 5955 /* write cmd to Set CGCG Overrride */ 5956 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5957 5958 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5959 gfx_v8_0_wait_for_rlc_serdes(adev); 5960 5961 /* write cmd to Clear CGLS */ 5962 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5963 5964 /* disable cgcg, cgls should be disabled too. */ 5965 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5966 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5967 if (temp != data) 5968 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5969 } 5970 5971 gfx_v8_0_wait_for_rlc_serdes(adev); 5972 5973 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5974 } 5975 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5976 bool enable) 5977 { 5978 if (enable) { 5979 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5980 * === MGCG + MGLS + TS(CG/LS) === 5981 */ 5982 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5983 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5984 } else { 5985 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5986 * === CGCG + CGLS === 5987 */ 5988 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5989 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5990 } 5991 return 0; 5992 } 5993 5994 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5995 enum amd_clockgating_state state) 5996 { 5997 uint32_t msg_id, pp_state; 5998 void *pp_handle = adev->powerplay.pp_handle; 5999 6000 if (state == AMD_CG_STATE_UNGATE) 6001 pp_state = 0; 6002 else 6003 pp_state = PP_STATE_CG | PP_STATE_LS; 6004 6005 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6006 PP_BLOCK_GFX_CG, 6007 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6008 pp_state); 6009 amd_set_clockgating_by_smu(pp_handle, msg_id); 6010 6011 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6012 PP_BLOCK_GFX_MG, 6013 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6014 pp_state); 6015 amd_set_clockgating_by_smu(pp_handle, msg_id); 6016 6017 return 0; 6018 } 6019 6020 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6021 enum amd_clockgating_state state) 6022 { 6023 uint32_t msg_id, pp_state; 6024 void *pp_handle = adev->powerplay.pp_handle; 6025 6026 if (state == AMD_CG_STATE_UNGATE) 6027 pp_state = 0; 6028 else 6029 pp_state = PP_STATE_CG | PP_STATE_LS; 6030 6031 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6032 PP_BLOCK_GFX_CG, 6033 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6034 pp_state); 6035 amd_set_clockgating_by_smu(pp_handle, msg_id); 6036 6037 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6038 PP_BLOCK_GFX_3D, 6039 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6040 pp_state); 6041 amd_set_clockgating_by_smu(pp_handle, msg_id); 6042 6043 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6044 PP_BLOCK_GFX_MG, 6045 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6046 pp_state); 6047 amd_set_clockgating_by_smu(pp_handle, msg_id); 6048 6049 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6050 PP_BLOCK_GFX_RLC, 6051 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6052 pp_state); 6053 amd_set_clockgating_by_smu(pp_handle, msg_id); 6054 6055 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6056 PP_BLOCK_GFX_CP, 6057 PP_STATE_SUPPORT_CG | PP_STATE_SUPPORT_LS, 6058 pp_state); 6059 amd_set_clockgating_by_smu(pp_handle, msg_id); 6060 6061 return 0; 6062 } 6063 6064 static int gfx_v8_0_set_clockgating_state(void *handle, 6065 enum amd_clockgating_state state) 6066 { 6067 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6068 6069 switch (adev->asic_type) { 6070 case CHIP_FIJI: 6071 case CHIP_CARRIZO: 6072 case CHIP_STONEY: 6073 gfx_v8_0_update_gfx_clock_gating(adev, 6074 state == AMD_CG_STATE_GATE ? true : false); 6075 break; 6076 case CHIP_TONGA: 6077 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6078 break; 6079 case CHIP_POLARIS10: 6080 case CHIP_POLARIS11: 6081 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6082 break; 6083 default: 6084 break; 6085 } 6086 return 0; 6087 } 6088 6089 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6090 { 6091 return ring->adev->wb.wb[ring->rptr_offs]; 6092 } 6093 6094 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6095 { 6096 struct amdgpu_device *adev = ring->adev; 6097 6098 if (ring->use_doorbell) 6099 /* XXX check if swapping is necessary on BE */ 6100 return ring->adev->wb.wb[ring->wptr_offs]; 6101 else 6102 return RREG32(mmCP_RB0_WPTR); 6103 } 6104 6105 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6106 { 6107 struct amdgpu_device *adev = ring->adev; 6108 6109 if (ring->use_doorbell) { 6110 /* XXX check if swapping is necessary on BE */ 6111 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6112 WDOORBELL32(ring->doorbell_index, ring->wptr); 6113 } else { 6114 WREG32(mmCP_RB0_WPTR, ring->wptr); 6115 (void)RREG32(mmCP_RB0_WPTR); 6116 } 6117 } 6118 6119 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6120 { 6121 u32 ref_and_mask, reg_mem_engine; 6122 6123 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) { 6124 switch (ring->me) { 6125 case 1: 6126 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6127 break; 6128 case 2: 6129 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6130 break; 6131 default: 6132 return; 6133 } 6134 reg_mem_engine = 0; 6135 } else { 6136 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6137 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6138 } 6139 6140 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6141 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6142 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6143 reg_mem_engine)); 6144 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6145 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6146 amdgpu_ring_write(ring, ref_and_mask); 6147 amdgpu_ring_write(ring, ref_and_mask); 6148 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6149 } 6150 6151 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6152 { 6153 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6154 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6155 WRITE_DATA_DST_SEL(0) | 6156 WR_CONFIRM)); 6157 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6158 amdgpu_ring_write(ring, 0); 6159 amdgpu_ring_write(ring, 1); 6160 6161 } 6162 6163 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6164 struct amdgpu_ib *ib, 6165 unsigned vm_id, bool ctx_switch) 6166 { 6167 u32 header, control = 0; 6168 6169 if (ib->flags & AMDGPU_IB_FLAG_CE) 6170 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6171 else 6172 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6173 6174 control |= ib->length_dw | (vm_id << 24); 6175 6176 amdgpu_ring_write(ring, header); 6177 amdgpu_ring_write(ring, 6178 #ifdef __BIG_ENDIAN 6179 (2 << 0) | 6180 #endif 6181 (ib->gpu_addr & 0xFFFFFFFC)); 6182 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6183 amdgpu_ring_write(ring, control); 6184 } 6185 6186 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6187 struct amdgpu_ib *ib, 6188 unsigned vm_id, bool ctx_switch) 6189 { 6190 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6191 6192 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6193 amdgpu_ring_write(ring, 6194 #ifdef __BIG_ENDIAN 6195 (2 << 0) | 6196 #endif 6197 (ib->gpu_addr & 0xFFFFFFFC)); 6198 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6199 amdgpu_ring_write(ring, control); 6200 } 6201 6202 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6203 u64 seq, unsigned flags) 6204 { 6205 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6206 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6207 6208 /* EVENT_WRITE_EOP - flush caches, send int */ 6209 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6210 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6211 EOP_TC_ACTION_EN | 6212 EOP_TC_WB_ACTION_EN | 6213 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6214 EVENT_INDEX(5))); 6215 amdgpu_ring_write(ring, addr & 0xfffffffc); 6216 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6217 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6218 amdgpu_ring_write(ring, lower_32_bits(seq)); 6219 amdgpu_ring_write(ring, upper_32_bits(seq)); 6220 6221 } 6222 6223 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6224 { 6225 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 6226 uint32_t seq = ring->fence_drv.sync_seq; 6227 uint64_t addr = ring->fence_drv.gpu_addr; 6228 6229 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6230 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6231 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6232 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6233 amdgpu_ring_write(ring, addr & 0xfffffffc); 6234 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6235 amdgpu_ring_write(ring, seq); 6236 amdgpu_ring_write(ring, 0xffffffff); 6237 amdgpu_ring_write(ring, 4); /* poll interval */ 6238 } 6239 6240 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6241 unsigned vm_id, uint64_t pd_addr) 6242 { 6243 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 6244 6245 /* GFX8 emits 128 dw nop to prevent DE do vm_flush before CE finish CEIB */ 6246 if (usepfp) 6247 amdgpu_ring_insert_nop(ring, 128); 6248 6249 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6250 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6251 WRITE_DATA_DST_SEL(0)) | 6252 WR_CONFIRM); 6253 if (vm_id < 8) { 6254 amdgpu_ring_write(ring, 6255 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6256 } else { 6257 amdgpu_ring_write(ring, 6258 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6259 } 6260 amdgpu_ring_write(ring, 0); 6261 amdgpu_ring_write(ring, pd_addr >> 12); 6262 6263 /* bits 0-15 are the VM contexts0-15 */ 6264 /* invalidate the cache */ 6265 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6266 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6267 WRITE_DATA_DST_SEL(0))); 6268 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6269 amdgpu_ring_write(ring, 0); 6270 amdgpu_ring_write(ring, 1 << vm_id); 6271 6272 /* wait for the invalidate to complete */ 6273 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6274 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6275 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6276 WAIT_REG_MEM_ENGINE(0))); /* me */ 6277 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6278 amdgpu_ring_write(ring, 0); 6279 amdgpu_ring_write(ring, 0); /* ref */ 6280 amdgpu_ring_write(ring, 0); /* mask */ 6281 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6282 6283 /* compute doesn't have PFP */ 6284 if (usepfp) { 6285 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6286 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6287 amdgpu_ring_write(ring, 0x0); 6288 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ 6289 amdgpu_ring_insert_nop(ring, 128); 6290 } 6291 } 6292 6293 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6294 { 6295 return ring->adev->wb.wb[ring->wptr_offs]; 6296 } 6297 6298 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6299 { 6300 struct amdgpu_device *adev = ring->adev; 6301 6302 /* XXX check if swapping is necessary on BE */ 6303 adev->wb.wb[ring->wptr_offs] = ring->wptr; 6304 WDOORBELL32(ring->doorbell_index, ring->wptr); 6305 } 6306 6307 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6308 u64 addr, u64 seq, 6309 unsigned flags) 6310 { 6311 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6312 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6313 6314 /* RELEASE_MEM - flush caches, send int */ 6315 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6316 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6317 EOP_TC_ACTION_EN | 6318 EOP_TC_WB_ACTION_EN | 6319 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6320 EVENT_INDEX(5))); 6321 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6322 amdgpu_ring_write(ring, addr & 0xfffffffc); 6323 amdgpu_ring_write(ring, upper_32_bits(addr)); 6324 amdgpu_ring_write(ring, lower_32_bits(seq)); 6325 amdgpu_ring_write(ring, upper_32_bits(seq)); 6326 } 6327 6328 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6329 { 6330 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6331 amdgpu_ring_write(ring, 0); 6332 } 6333 6334 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6335 { 6336 uint32_t dw2 = 0; 6337 6338 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6339 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6340 /* set load_global_config & load_global_uconfig */ 6341 dw2 |= 0x8001; 6342 /* set load_cs_sh_regs */ 6343 dw2 |= 0x01000000; 6344 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6345 dw2 |= 0x10002; 6346 6347 /* set load_ce_ram if preamble presented */ 6348 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6349 dw2 |= 0x10000000; 6350 } else { 6351 /* still load_ce_ram if this is the first time preamble presented 6352 * although there is no context switch happens. 6353 */ 6354 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6355 dw2 |= 0x10000000; 6356 } 6357 6358 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6359 amdgpu_ring_write(ring, dw2); 6360 amdgpu_ring_write(ring, 0); 6361 } 6362 6363 static unsigned gfx_v8_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring) 6364 { 6365 return 6366 4; /* gfx_v8_0_ring_emit_ib_gfx */ 6367 } 6368 6369 static unsigned gfx_v8_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring) 6370 { 6371 return 6372 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6373 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6374 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6375 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ 6376 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6377 256 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ 6378 2 + /* gfx_v8_ring_emit_sb */ 6379 3; /* gfx_v8_ring_emit_cntxcntl */ 6380 } 6381 6382 static unsigned gfx_v8_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring) 6383 { 6384 return 6385 4; /* gfx_v8_0_ring_emit_ib_compute */ 6386 } 6387 6388 static unsigned gfx_v8_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring) 6389 { 6390 return 6391 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6392 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6393 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 6394 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6395 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6396 7 + 7 + 7; /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6397 } 6398 6399 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6400 enum amdgpu_interrupt_state state) 6401 { 6402 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6403 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6404 } 6405 6406 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6407 int me, int pipe, 6408 enum amdgpu_interrupt_state state) 6409 { 6410 /* 6411 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6412 * handles the setting of interrupts for this specific pipe. All other 6413 * pipes' interrupts are set by amdkfd. 6414 */ 6415 6416 if (me == 1) { 6417 switch (pipe) { 6418 case 0: 6419 break; 6420 default: 6421 DRM_DEBUG("invalid pipe %d\n", pipe); 6422 return; 6423 } 6424 } else { 6425 DRM_DEBUG("invalid me %d\n", me); 6426 return; 6427 } 6428 6429 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6430 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6431 } 6432 6433 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6434 struct amdgpu_irq_src *source, 6435 unsigned type, 6436 enum amdgpu_interrupt_state state) 6437 { 6438 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6439 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6440 6441 return 0; 6442 } 6443 6444 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6445 struct amdgpu_irq_src *source, 6446 unsigned type, 6447 enum amdgpu_interrupt_state state) 6448 { 6449 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6450 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6451 6452 return 0; 6453 } 6454 6455 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6456 struct amdgpu_irq_src *src, 6457 unsigned type, 6458 enum amdgpu_interrupt_state state) 6459 { 6460 switch (type) { 6461 case AMDGPU_CP_IRQ_GFX_EOP: 6462 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6463 break; 6464 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6465 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6466 break; 6467 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6468 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6469 break; 6470 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6471 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6472 break; 6473 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6474 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6475 break; 6476 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6477 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6478 break; 6479 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6480 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6481 break; 6482 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6483 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6484 break; 6485 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6486 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6487 break; 6488 default: 6489 break; 6490 } 6491 return 0; 6492 } 6493 6494 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6495 struct amdgpu_irq_src *source, 6496 struct amdgpu_iv_entry *entry) 6497 { 6498 int i; 6499 u8 me_id, pipe_id, queue_id; 6500 struct amdgpu_ring *ring; 6501 6502 DRM_DEBUG("IH: CP EOP\n"); 6503 me_id = (entry->ring_id & 0x0c) >> 2; 6504 pipe_id = (entry->ring_id & 0x03) >> 0; 6505 queue_id = (entry->ring_id & 0x70) >> 4; 6506 6507 switch (me_id) { 6508 case 0: 6509 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6510 break; 6511 case 1: 6512 case 2: 6513 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6514 ring = &adev->gfx.compute_ring[i]; 6515 /* Per-queue interrupt is supported for MEC starting from VI. 6516 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6517 */ 6518 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6519 amdgpu_fence_process(ring); 6520 } 6521 break; 6522 } 6523 return 0; 6524 } 6525 6526 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6527 struct amdgpu_irq_src *source, 6528 struct amdgpu_iv_entry *entry) 6529 { 6530 DRM_ERROR("Illegal register access in command stream\n"); 6531 schedule_work(&adev->reset_work); 6532 return 0; 6533 } 6534 6535 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6536 struct amdgpu_irq_src *source, 6537 struct amdgpu_iv_entry *entry) 6538 { 6539 DRM_ERROR("Illegal instruction in command stream\n"); 6540 schedule_work(&adev->reset_work); 6541 return 0; 6542 } 6543 6544 const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6545 .name = "gfx_v8_0", 6546 .early_init = gfx_v8_0_early_init, 6547 .late_init = gfx_v8_0_late_init, 6548 .sw_init = gfx_v8_0_sw_init, 6549 .sw_fini = gfx_v8_0_sw_fini, 6550 .hw_init = gfx_v8_0_hw_init, 6551 .hw_fini = gfx_v8_0_hw_fini, 6552 .suspend = gfx_v8_0_suspend, 6553 .resume = gfx_v8_0_resume, 6554 .is_idle = gfx_v8_0_is_idle, 6555 .wait_for_idle = gfx_v8_0_wait_for_idle, 6556 .check_soft_reset = gfx_v8_0_check_soft_reset, 6557 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6558 .soft_reset = gfx_v8_0_soft_reset, 6559 .post_soft_reset = gfx_v8_0_post_soft_reset, 6560 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6561 .set_powergating_state = gfx_v8_0_set_powergating_state, 6562 }; 6563 6564 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6565 .get_rptr = gfx_v8_0_ring_get_rptr, 6566 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6567 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6568 .parse_cs = NULL, 6569 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6570 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6571 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6572 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6573 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6574 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6575 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6576 .test_ring = gfx_v8_0_ring_test_ring, 6577 .test_ib = gfx_v8_0_ring_test_ib, 6578 .insert_nop = amdgpu_ring_insert_nop, 6579 .pad_ib = amdgpu_ring_generic_pad_ib, 6580 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6581 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6582 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_gfx, 6583 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_gfx, 6584 }; 6585 6586 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6587 .get_rptr = gfx_v8_0_ring_get_rptr, 6588 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6589 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6590 .parse_cs = NULL, 6591 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6592 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6593 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6594 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6595 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6596 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6597 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6598 .test_ring = gfx_v8_0_ring_test_ring, 6599 .test_ib = gfx_v8_0_ring_test_ib, 6600 .insert_nop = amdgpu_ring_insert_nop, 6601 .pad_ib = amdgpu_ring_generic_pad_ib, 6602 .get_emit_ib_size = gfx_v8_0_ring_get_emit_ib_size_compute, 6603 .get_dma_frame_size = gfx_v8_0_ring_get_dma_frame_size_compute, 6604 }; 6605 6606 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6607 { 6608 int i; 6609 6610 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6611 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6612 6613 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6614 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6615 } 6616 6617 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6618 .set = gfx_v8_0_set_eop_interrupt_state, 6619 .process = gfx_v8_0_eop_irq, 6620 }; 6621 6622 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6623 .set = gfx_v8_0_set_priv_reg_fault_state, 6624 .process = gfx_v8_0_priv_reg_irq, 6625 }; 6626 6627 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6628 .set = gfx_v8_0_set_priv_inst_fault_state, 6629 .process = gfx_v8_0_priv_inst_irq, 6630 }; 6631 6632 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6633 { 6634 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6635 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6636 6637 adev->gfx.priv_reg_irq.num_types = 1; 6638 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6639 6640 adev->gfx.priv_inst_irq.num_types = 1; 6641 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6642 } 6643 6644 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6645 { 6646 switch (adev->asic_type) { 6647 case CHIP_TOPAZ: 6648 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6649 break; 6650 case CHIP_STONEY: 6651 case CHIP_CARRIZO: 6652 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6653 break; 6654 default: 6655 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6656 break; 6657 } 6658 } 6659 6660 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6661 { 6662 /* init asci gds info */ 6663 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6664 adev->gds.gws.total_size = 64; 6665 adev->gds.oa.total_size = 16; 6666 6667 if (adev->gds.mem.total_size == 64 * 1024) { 6668 adev->gds.mem.gfx_partition_size = 4096; 6669 adev->gds.mem.cs_partition_size = 4096; 6670 6671 adev->gds.gws.gfx_partition_size = 4; 6672 adev->gds.gws.cs_partition_size = 4; 6673 6674 adev->gds.oa.gfx_partition_size = 4; 6675 adev->gds.oa.cs_partition_size = 1; 6676 } else { 6677 adev->gds.mem.gfx_partition_size = 1024; 6678 adev->gds.mem.cs_partition_size = 1024; 6679 6680 adev->gds.gws.gfx_partition_size = 16; 6681 adev->gds.gws.cs_partition_size = 16; 6682 6683 adev->gds.oa.gfx_partition_size = 4; 6684 adev->gds.oa.cs_partition_size = 4; 6685 } 6686 } 6687 6688 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 6689 u32 bitmap) 6690 { 6691 u32 data; 6692 6693 if (!bitmap) 6694 return; 6695 6696 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6697 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6698 6699 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 6700 } 6701 6702 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6703 { 6704 u32 data, mask; 6705 6706 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 6707 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6708 6709 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6710 6711 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 6712 } 6713 6714 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6715 { 6716 int i, j, k, counter, active_cu_number = 0; 6717 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6718 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6719 unsigned disable_masks[4 * 2]; 6720 6721 memset(cu_info, 0, sizeof(*cu_info)); 6722 6723 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 6724 6725 mutex_lock(&adev->grbm_idx_mutex); 6726 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6727 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6728 mask = 1; 6729 ao_bitmap = 0; 6730 counter = 0; 6731 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 6732 if (i < 4 && j < 2) 6733 gfx_v8_0_set_user_cu_inactive_bitmap( 6734 adev, disable_masks[i * 2 + j]); 6735 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6736 cu_info->bitmap[i][j] = bitmap; 6737 6738 for (k = 0; k < 16; k ++) { 6739 if (bitmap & mask) { 6740 if (counter < 2) 6741 ao_bitmap |= mask; 6742 counter ++; 6743 } 6744 mask <<= 1; 6745 } 6746 active_cu_number += counter; 6747 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6748 } 6749 } 6750 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 6751 mutex_unlock(&adev->grbm_idx_mutex); 6752 6753 cu_info->number = active_cu_number; 6754 cu_info->ao_cu_mask = ao_cu_mask; 6755 } 6756