1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vid.h" 29 #include "amdgpu_ucode.h" 30 #include "amdgpu_atombios.h" 31 #include "clearstate_vi.h" 32 33 #include "gmc/gmc_8_2_d.h" 34 #include "gmc/gmc_8_2_sh_mask.h" 35 36 #include "oss/oss_3_0_d.h" 37 #include "oss/oss_3_0_sh_mask.h" 38 39 #include "bif/bif_5_0_d.h" 40 #include "bif/bif_5_0_sh_mask.h" 41 42 #include "gca/gfx_8_0_d.h" 43 #include "gca/gfx_8_0_enum.h" 44 #include "gca/gfx_8_0_sh_mask.h" 45 #include "gca/gfx_8_0_enum.h" 46 47 #include "dce/dce_10_0_d.h" 48 #include "dce/dce_10_0_sh_mask.h" 49 50 #include "smu/smu_7_1_3_d.h" 51 52 #define GFX8_NUM_GFX_RINGS 1 53 #define GFX8_NUM_COMPUTE_RINGS 8 54 55 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 56 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 57 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 58 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 59 60 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 61 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 62 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 63 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 64 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 65 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 66 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 67 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 68 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 69 70 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 71 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 72 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 73 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 74 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 75 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 76 77 /* BPM SERDES CMD */ 78 #define SET_BPM_SERDES_CMD 1 79 #define CLE_BPM_SERDES_CMD 0 80 81 /* BPM Register Address*/ 82 enum { 83 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 84 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 85 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 86 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 87 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 88 BPM_REG_FGCG_MAX 89 }; 90 91 #define RLC_FormatDirectRegListLength 14 92 93 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 94 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 95 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 99 100 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 101 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 102 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 105 106 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 107 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 108 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 112 113 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 114 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 115 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 118 119 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 120 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 121 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 125 126 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 127 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 128 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 132 133 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 139 140 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 141 { 142 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 143 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 144 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 145 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 146 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 147 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 148 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 149 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 150 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 151 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 152 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 153 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 154 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 155 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 156 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 157 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 158 }; 159 160 static const u32 golden_settings_tonga_a11[] = 161 { 162 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 163 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 164 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 165 mmGB_GPU_ID, 0x0000000f, 0x00000000, 166 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 167 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 168 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 169 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 170 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 171 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 172 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 173 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 174 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 175 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 176 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 177 }; 178 179 static const u32 tonga_golden_common_all[] = 180 { 181 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 182 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 183 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 184 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 185 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 186 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 187 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 188 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 189 }; 190 191 static const u32 tonga_mgcg_cgcg_init[] = 192 { 193 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 194 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 195 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 196 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 197 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 198 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 199 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 200 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 201 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 202 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 203 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 204 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 205 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 210 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 211 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 212 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 213 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 214 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 215 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 218 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 219 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 220 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 221 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 222 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 223 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 224 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 225 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 226 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 227 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 228 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 229 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 230 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 231 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 232 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 233 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 234 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 235 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 236 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 237 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 238 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 239 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 240 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 241 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 242 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 243 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 244 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 245 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 246 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 247 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 248 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 249 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 265 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 266 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 267 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 268 }; 269 270 static const u32 golden_settings_polaris11_a11[] = 271 { 272 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208, 273 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 274 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 275 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 276 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 277 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 278 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 279 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 280 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 281 mmSQ_CONFIG, 0x07f80000, 0x07180000, 282 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 283 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 284 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 285 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 286 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 287 }; 288 289 static const u32 polaris11_golden_common_all[] = 290 { 291 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 292 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 293 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 294 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 295 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 296 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 297 }; 298 299 static const u32 golden_settings_polaris10_a11[] = 300 { 301 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 302 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 303 mmCB_HW_CONTROL_2, 0, 0x0f000000, 304 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 305 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 306 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 307 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 308 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 309 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 310 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 311 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 312 mmSQ_CONFIG, 0x07f80000, 0x07180000, 313 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 314 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 315 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 316 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 317 }; 318 319 static const u32 polaris10_golden_common_all[] = 320 { 321 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 322 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 323 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 324 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 325 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 326 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 327 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 328 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 329 }; 330 331 static const u32 fiji_golden_common_all[] = 332 { 333 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 334 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 335 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 336 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 337 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 338 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 340 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 341 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 342 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 343 }; 344 345 static const u32 golden_settings_fiji_a10[] = 346 { 347 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 348 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 349 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 350 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 351 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 352 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 353 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 354 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 355 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 356 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 357 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 358 }; 359 360 static const u32 fiji_mgcg_cgcg_init[] = 361 { 362 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 363 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 364 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 365 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 366 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 367 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 368 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 369 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 370 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 371 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 372 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 373 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 374 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 375 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 376 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 377 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 380 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 381 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 382 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 383 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 384 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 385 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 386 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 387 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 388 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 389 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 390 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 391 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 392 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 393 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 394 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 395 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 396 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 397 }; 398 399 static const u32 golden_settings_iceland_a11[] = 400 { 401 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 402 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 403 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 404 mmGB_GPU_ID, 0x0000000f, 0x00000000, 405 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 406 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 407 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 408 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 409 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 410 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 411 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 412 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 413 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 414 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 415 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 416 }; 417 418 static const u32 iceland_golden_common_all[] = 419 { 420 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 421 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 422 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 423 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 424 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 425 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 426 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 427 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 428 }; 429 430 static const u32 iceland_mgcg_cgcg_init[] = 431 { 432 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 434 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 437 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 438 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 439 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 443 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 448 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 452 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 453 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 454 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 455 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 457 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 458 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 459 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 460 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 461 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 462 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 463 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 464 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 465 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 466 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 467 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 468 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 469 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 470 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 471 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 472 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 473 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 474 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 475 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 476 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 477 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 478 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 479 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 480 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 481 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 482 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 483 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 484 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 485 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 486 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 487 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 488 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 489 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 490 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 491 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 492 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 493 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 494 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 495 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 496 }; 497 498 static const u32 cz_golden_settings_a11[] = 499 { 500 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 501 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 502 mmGB_GPU_ID, 0x0000000f, 0x00000000, 503 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 504 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 505 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 506 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 507 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 508 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 509 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 510 }; 511 512 static const u32 cz_golden_common_all[] = 513 { 514 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 515 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 516 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 517 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 518 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 519 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 520 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 521 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 522 }; 523 524 static const u32 cz_mgcg_cgcg_init[] = 525 { 526 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 527 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 528 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 530 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 531 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 532 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 533 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 534 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 535 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 536 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 537 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 538 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 539 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 540 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 541 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 542 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 543 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 544 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 545 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 546 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 547 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 548 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 551 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 552 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 553 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 554 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 555 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 556 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 557 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 563 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 564 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 565 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 566 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 567 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 568 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 569 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 570 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 571 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 572 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 573 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 574 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 575 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 576 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 577 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 578 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 579 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 580 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 581 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 582 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 583 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 584 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 585 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 586 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 587 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 588 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 589 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 590 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 591 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 592 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 593 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 594 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 595 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 596 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 597 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 598 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 599 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 600 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 601 }; 602 603 static const u32 stoney_golden_settings_a11[] = 604 { 605 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 606 mmGB_GPU_ID, 0x0000000f, 0x00000000, 607 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 608 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 609 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 610 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 611 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 612 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 613 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 614 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 615 }; 616 617 static const u32 stoney_golden_common_all[] = 618 { 619 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 620 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 621 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 622 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 623 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 624 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 625 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 626 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 627 }; 628 629 static const u32 stoney_mgcg_cgcg_init[] = 630 { 631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 632 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 633 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 634 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 635 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 636 mmATC_MISC_CG, 0xffffffff, 0x000c0200, 637 }; 638 639 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 640 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 641 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 642 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 643 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 644 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 645 646 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 647 { 648 switch (adev->asic_type) { 649 case CHIP_TOPAZ: 650 amdgpu_program_register_sequence(adev, 651 iceland_mgcg_cgcg_init, 652 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 653 amdgpu_program_register_sequence(adev, 654 golden_settings_iceland_a11, 655 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 656 amdgpu_program_register_sequence(adev, 657 iceland_golden_common_all, 658 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 659 break; 660 case CHIP_FIJI: 661 amdgpu_program_register_sequence(adev, 662 fiji_mgcg_cgcg_init, 663 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 664 amdgpu_program_register_sequence(adev, 665 golden_settings_fiji_a10, 666 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 667 amdgpu_program_register_sequence(adev, 668 fiji_golden_common_all, 669 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 670 break; 671 672 case CHIP_TONGA: 673 amdgpu_program_register_sequence(adev, 674 tonga_mgcg_cgcg_init, 675 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 676 amdgpu_program_register_sequence(adev, 677 golden_settings_tonga_a11, 678 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 679 amdgpu_program_register_sequence(adev, 680 tonga_golden_common_all, 681 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 682 break; 683 case CHIP_POLARIS11: 684 amdgpu_program_register_sequence(adev, 685 golden_settings_polaris11_a11, 686 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 687 amdgpu_program_register_sequence(adev, 688 polaris11_golden_common_all, 689 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 690 break; 691 case CHIP_POLARIS10: 692 amdgpu_program_register_sequence(adev, 693 golden_settings_polaris10_a11, 694 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 695 amdgpu_program_register_sequence(adev, 696 polaris10_golden_common_all, 697 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 698 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 699 break; 700 case CHIP_CARRIZO: 701 amdgpu_program_register_sequence(adev, 702 cz_mgcg_cgcg_init, 703 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 704 amdgpu_program_register_sequence(adev, 705 cz_golden_settings_a11, 706 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 707 amdgpu_program_register_sequence(adev, 708 cz_golden_common_all, 709 (const u32)ARRAY_SIZE(cz_golden_common_all)); 710 break; 711 case CHIP_STONEY: 712 amdgpu_program_register_sequence(adev, 713 stoney_mgcg_cgcg_init, 714 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 715 amdgpu_program_register_sequence(adev, 716 stoney_golden_settings_a11, 717 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 718 amdgpu_program_register_sequence(adev, 719 stoney_golden_common_all, 720 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 721 break; 722 default: 723 break; 724 } 725 } 726 727 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 728 { 729 int i; 730 731 adev->gfx.scratch.num_reg = 7; 732 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 733 for (i = 0; i < adev->gfx.scratch.num_reg; i++) { 734 adev->gfx.scratch.free[i] = true; 735 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; 736 } 737 } 738 739 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 740 { 741 struct amdgpu_device *adev = ring->adev; 742 uint32_t scratch; 743 uint32_t tmp = 0; 744 unsigned i; 745 int r; 746 747 r = amdgpu_gfx_scratch_get(adev, &scratch); 748 if (r) { 749 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 750 return r; 751 } 752 WREG32(scratch, 0xCAFEDEAD); 753 r = amdgpu_ring_alloc(ring, 3); 754 if (r) { 755 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 756 ring->idx, r); 757 amdgpu_gfx_scratch_free(adev, scratch); 758 return r; 759 } 760 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 761 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 762 amdgpu_ring_write(ring, 0xDEADBEEF); 763 amdgpu_ring_commit(ring); 764 765 for (i = 0; i < adev->usec_timeout; i++) { 766 tmp = RREG32(scratch); 767 if (tmp == 0xDEADBEEF) 768 break; 769 DRM_UDELAY(1); 770 } 771 if (i < adev->usec_timeout) { 772 DRM_INFO("ring test on %d succeeded in %d usecs\n", 773 ring->idx, i); 774 } else { 775 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 776 ring->idx, scratch, tmp); 777 r = -EINVAL; 778 } 779 amdgpu_gfx_scratch_free(adev, scratch); 780 return r; 781 } 782 783 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) 784 { 785 struct amdgpu_device *adev = ring->adev; 786 struct amdgpu_ib ib; 787 struct fence *f = NULL; 788 uint32_t scratch; 789 uint32_t tmp = 0; 790 unsigned i; 791 int r; 792 793 r = amdgpu_gfx_scratch_get(adev, &scratch); 794 if (r) { 795 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r); 796 return r; 797 } 798 WREG32(scratch, 0xCAFEDEAD); 799 memset(&ib, 0, sizeof(ib)); 800 r = amdgpu_ib_get(adev, NULL, 256, &ib); 801 if (r) { 802 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 803 goto err1; 804 } 805 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 806 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 807 ib.ptr[2] = 0xDEADBEEF; 808 ib.length_dw = 3; 809 810 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 811 if (r) 812 goto err2; 813 814 r = fence_wait(f, false); 815 if (r) { 816 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 817 goto err2; 818 } 819 for (i = 0; i < adev->usec_timeout; i++) { 820 tmp = RREG32(scratch); 821 if (tmp == 0xDEADBEEF) 822 break; 823 DRM_UDELAY(1); 824 } 825 if (i < adev->usec_timeout) { 826 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", 827 ring->idx, i); 828 goto err2; 829 } else { 830 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 831 scratch, tmp); 832 r = -EINVAL; 833 } 834 err2: 835 fence_put(f); 836 amdgpu_ib_free(adev, &ib, NULL); 837 fence_put(f); 838 err1: 839 amdgpu_gfx_scratch_free(adev, scratch); 840 return r; 841 } 842 843 844 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 845 release_firmware(adev->gfx.pfp_fw); 846 adev->gfx.pfp_fw = NULL; 847 release_firmware(adev->gfx.me_fw); 848 adev->gfx.me_fw = NULL; 849 release_firmware(adev->gfx.ce_fw); 850 adev->gfx.ce_fw = NULL; 851 release_firmware(adev->gfx.rlc_fw); 852 adev->gfx.rlc_fw = NULL; 853 release_firmware(adev->gfx.mec_fw); 854 adev->gfx.mec_fw = NULL; 855 if ((adev->asic_type != CHIP_STONEY) && 856 (adev->asic_type != CHIP_TOPAZ)) 857 release_firmware(adev->gfx.mec2_fw); 858 adev->gfx.mec2_fw = NULL; 859 860 kfree(adev->gfx.rlc.register_list_format); 861 } 862 863 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 864 { 865 const char *chip_name; 866 char fw_name[30]; 867 int err; 868 struct amdgpu_firmware_info *info = NULL; 869 const struct common_firmware_header *header = NULL; 870 const struct gfx_firmware_header_v1_0 *cp_hdr; 871 const struct rlc_firmware_header_v2_0 *rlc_hdr; 872 unsigned int *tmp = NULL, i; 873 874 DRM_DEBUG("\n"); 875 876 switch (adev->asic_type) { 877 case CHIP_TOPAZ: 878 chip_name = "topaz"; 879 break; 880 case CHIP_TONGA: 881 chip_name = "tonga"; 882 break; 883 case CHIP_CARRIZO: 884 chip_name = "carrizo"; 885 break; 886 case CHIP_FIJI: 887 chip_name = "fiji"; 888 break; 889 case CHIP_POLARIS11: 890 chip_name = "polaris11"; 891 break; 892 case CHIP_POLARIS10: 893 chip_name = "polaris10"; 894 break; 895 case CHIP_STONEY: 896 chip_name = "stoney"; 897 break; 898 default: 899 BUG(); 900 } 901 902 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 903 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 904 if (err) 905 goto out; 906 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 907 if (err) 908 goto out; 909 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 910 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 911 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 912 913 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 914 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 915 if (err) 916 goto out; 917 err = amdgpu_ucode_validate(adev->gfx.me_fw); 918 if (err) 919 goto out; 920 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 921 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 922 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 923 924 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 925 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 926 if (err) 927 goto out; 928 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 929 if (err) 930 goto out; 931 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 932 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 933 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 934 935 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 936 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 937 if (err) 938 goto out; 939 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 940 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 941 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 942 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 943 944 adev->gfx.rlc.save_and_restore_offset = 945 le32_to_cpu(rlc_hdr->save_and_restore_offset); 946 adev->gfx.rlc.clear_state_descriptor_offset = 947 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 948 adev->gfx.rlc.avail_scratch_ram_locations = 949 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 950 adev->gfx.rlc.reg_restore_list_size = 951 le32_to_cpu(rlc_hdr->reg_restore_list_size); 952 adev->gfx.rlc.reg_list_format_start = 953 le32_to_cpu(rlc_hdr->reg_list_format_start); 954 adev->gfx.rlc.reg_list_format_separate_start = 955 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 956 adev->gfx.rlc.starting_offsets_start = 957 le32_to_cpu(rlc_hdr->starting_offsets_start); 958 adev->gfx.rlc.reg_list_format_size_bytes = 959 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 960 adev->gfx.rlc.reg_list_size_bytes = 961 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 962 963 adev->gfx.rlc.register_list_format = 964 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 965 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 966 967 if (!adev->gfx.rlc.register_list_format) { 968 err = -ENOMEM; 969 goto out; 970 } 971 972 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 973 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 974 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 975 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 976 977 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 978 979 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 980 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 981 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 982 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 983 984 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 985 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 986 if (err) 987 goto out; 988 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 989 if (err) 990 goto out; 991 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 992 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 993 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 994 995 if ((adev->asic_type != CHIP_STONEY) && 996 (adev->asic_type != CHIP_TOPAZ)) { 997 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 998 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 999 if (!err) { 1000 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1001 if (err) 1002 goto out; 1003 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1004 adev->gfx.mec2_fw->data; 1005 adev->gfx.mec2_fw_version = 1006 le32_to_cpu(cp_hdr->header.ucode_version); 1007 adev->gfx.mec2_feature_version = 1008 le32_to_cpu(cp_hdr->ucode_feature_version); 1009 } else { 1010 err = 0; 1011 adev->gfx.mec2_fw = NULL; 1012 } 1013 } 1014 1015 if (adev->firmware.smu_load) { 1016 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1017 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1018 info->fw = adev->gfx.pfp_fw; 1019 header = (const struct common_firmware_header *)info->fw->data; 1020 adev->firmware.fw_size += 1021 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1022 1023 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1024 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1025 info->fw = adev->gfx.me_fw; 1026 header = (const struct common_firmware_header *)info->fw->data; 1027 adev->firmware.fw_size += 1028 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1029 1030 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1031 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1032 info->fw = adev->gfx.ce_fw; 1033 header = (const struct common_firmware_header *)info->fw->data; 1034 adev->firmware.fw_size += 1035 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1036 1037 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1038 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1039 info->fw = adev->gfx.rlc_fw; 1040 header = (const struct common_firmware_header *)info->fw->data; 1041 adev->firmware.fw_size += 1042 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1043 1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1045 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1046 info->fw = adev->gfx.mec_fw; 1047 header = (const struct common_firmware_header *)info->fw->data; 1048 adev->firmware.fw_size += 1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1050 1051 if (adev->gfx.mec2_fw) { 1052 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1053 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1054 info->fw = adev->gfx.mec2_fw; 1055 header = (const struct common_firmware_header *)info->fw->data; 1056 adev->firmware.fw_size += 1057 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1058 } 1059 1060 } 1061 1062 out: 1063 if (err) { 1064 dev_err(adev->dev, 1065 "gfx8: Failed to load firmware \"%s\"\n", 1066 fw_name); 1067 release_firmware(adev->gfx.pfp_fw); 1068 adev->gfx.pfp_fw = NULL; 1069 release_firmware(adev->gfx.me_fw); 1070 adev->gfx.me_fw = NULL; 1071 release_firmware(adev->gfx.ce_fw); 1072 adev->gfx.ce_fw = NULL; 1073 release_firmware(adev->gfx.rlc_fw); 1074 adev->gfx.rlc_fw = NULL; 1075 release_firmware(adev->gfx.mec_fw); 1076 adev->gfx.mec_fw = NULL; 1077 release_firmware(adev->gfx.mec2_fw); 1078 adev->gfx.mec2_fw = NULL; 1079 } 1080 return err; 1081 } 1082 1083 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1084 volatile u32 *buffer) 1085 { 1086 u32 count = 0, i; 1087 const struct cs_section_def *sect = NULL; 1088 const struct cs_extent_def *ext = NULL; 1089 1090 if (adev->gfx.rlc.cs_data == NULL) 1091 return; 1092 if (buffer == NULL) 1093 return; 1094 1095 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1096 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1097 1098 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1099 buffer[count++] = cpu_to_le32(0x80000000); 1100 buffer[count++] = cpu_to_le32(0x80000000); 1101 1102 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1103 for (ext = sect->section; ext->extent != NULL; ++ext) { 1104 if (sect->id == SECT_CONTEXT) { 1105 buffer[count++] = 1106 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1107 buffer[count++] = cpu_to_le32(ext->reg_index - 1108 PACKET3_SET_CONTEXT_REG_START); 1109 for (i = 0; i < ext->reg_count; i++) 1110 buffer[count++] = cpu_to_le32(ext->extent[i]); 1111 } else { 1112 return; 1113 } 1114 } 1115 } 1116 1117 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1118 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1119 PACKET3_SET_CONTEXT_REG_START); 1120 switch (adev->asic_type) { 1121 case CHIP_TONGA: 1122 case CHIP_POLARIS10: 1123 buffer[count++] = cpu_to_le32(0x16000012); 1124 buffer[count++] = cpu_to_le32(0x0000002A); 1125 break; 1126 case CHIP_POLARIS11: 1127 buffer[count++] = cpu_to_le32(0x16000012); 1128 buffer[count++] = cpu_to_le32(0x00000000); 1129 break; 1130 case CHIP_FIJI: 1131 buffer[count++] = cpu_to_le32(0x3a00161a); 1132 buffer[count++] = cpu_to_le32(0x0000002e); 1133 break; 1134 case CHIP_TOPAZ: 1135 case CHIP_CARRIZO: 1136 buffer[count++] = cpu_to_le32(0x00000002); 1137 buffer[count++] = cpu_to_le32(0x00000000); 1138 break; 1139 case CHIP_STONEY: 1140 buffer[count++] = cpu_to_le32(0x00000000); 1141 buffer[count++] = cpu_to_le32(0x00000000); 1142 break; 1143 default: 1144 buffer[count++] = cpu_to_le32(0x00000000); 1145 buffer[count++] = cpu_to_le32(0x00000000); 1146 break; 1147 } 1148 1149 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1150 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1151 1152 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1153 buffer[count++] = cpu_to_le32(0); 1154 } 1155 1156 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1157 { 1158 int r; 1159 1160 /* clear state block */ 1161 if (adev->gfx.rlc.clear_state_obj) { 1162 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1163 if (unlikely(r != 0)) 1164 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r); 1165 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1166 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1167 1168 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1169 adev->gfx.rlc.clear_state_obj = NULL; 1170 } 1171 } 1172 1173 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1174 { 1175 volatile u32 *dst_ptr; 1176 u32 dws; 1177 const struct cs_section_def *cs_data; 1178 int r; 1179 1180 adev->gfx.rlc.cs_data = vi_cs_data; 1181 1182 cs_data = adev->gfx.rlc.cs_data; 1183 1184 if (cs_data) { 1185 /* clear state block */ 1186 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1187 1188 if (adev->gfx.rlc.clear_state_obj == NULL) { 1189 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1190 AMDGPU_GEM_DOMAIN_VRAM, 1191 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, 1192 NULL, NULL, 1193 &adev->gfx.rlc.clear_state_obj); 1194 if (r) { 1195 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1196 gfx_v8_0_rlc_fini(adev); 1197 return r; 1198 } 1199 } 1200 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1201 if (unlikely(r != 0)) { 1202 gfx_v8_0_rlc_fini(adev); 1203 return r; 1204 } 1205 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1206 &adev->gfx.rlc.clear_state_gpu_addr); 1207 if (r) { 1208 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1209 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r); 1210 gfx_v8_0_rlc_fini(adev); 1211 return r; 1212 } 1213 1214 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1215 if (r) { 1216 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r); 1217 gfx_v8_0_rlc_fini(adev); 1218 return r; 1219 } 1220 /* set up the cs buffer */ 1221 dst_ptr = adev->gfx.rlc.cs_ptr; 1222 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1223 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1224 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1225 } 1226 1227 return 0; 1228 } 1229 1230 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1231 { 1232 int r; 1233 1234 if (adev->gfx.mec.hpd_eop_obj) { 1235 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1236 if (unlikely(r != 0)) 1237 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1238 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1239 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1240 1241 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1242 adev->gfx.mec.hpd_eop_obj = NULL; 1243 } 1244 } 1245 1246 #define MEC_HPD_SIZE 2048 1247 1248 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1249 { 1250 int r; 1251 u32 *hpd; 1252 1253 /* 1254 * we assign only 1 pipe because all other pipes will 1255 * be handled by KFD 1256 */ 1257 adev->gfx.mec.num_mec = 1; 1258 adev->gfx.mec.num_pipe = 1; 1259 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1260 1261 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1262 r = amdgpu_bo_create(adev, 1263 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, 1264 PAGE_SIZE, true, 1265 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1266 &adev->gfx.mec.hpd_eop_obj); 1267 if (r) { 1268 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1269 return r; 1270 } 1271 } 1272 1273 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1274 if (unlikely(r != 0)) { 1275 gfx_v8_0_mec_fini(adev); 1276 return r; 1277 } 1278 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1279 &adev->gfx.mec.hpd_eop_gpu_addr); 1280 if (r) { 1281 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1282 gfx_v8_0_mec_fini(adev); 1283 return r; 1284 } 1285 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1286 if (r) { 1287 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1288 gfx_v8_0_mec_fini(adev); 1289 return r; 1290 } 1291 1292 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); 1293 1294 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1295 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1296 1297 return 0; 1298 } 1299 1300 static const u32 vgpr_init_compute_shader[] = 1301 { 1302 0x7e000209, 0x7e020208, 1303 0x7e040207, 0x7e060206, 1304 0x7e080205, 0x7e0a0204, 1305 0x7e0c0203, 0x7e0e0202, 1306 0x7e100201, 0x7e120200, 1307 0x7e140209, 0x7e160208, 1308 0x7e180207, 0x7e1a0206, 1309 0x7e1c0205, 0x7e1e0204, 1310 0x7e200203, 0x7e220202, 1311 0x7e240201, 0x7e260200, 1312 0x7e280209, 0x7e2a0208, 1313 0x7e2c0207, 0x7e2e0206, 1314 0x7e300205, 0x7e320204, 1315 0x7e340203, 0x7e360202, 1316 0x7e380201, 0x7e3a0200, 1317 0x7e3c0209, 0x7e3e0208, 1318 0x7e400207, 0x7e420206, 1319 0x7e440205, 0x7e460204, 1320 0x7e480203, 0x7e4a0202, 1321 0x7e4c0201, 0x7e4e0200, 1322 0x7e500209, 0x7e520208, 1323 0x7e540207, 0x7e560206, 1324 0x7e580205, 0x7e5a0204, 1325 0x7e5c0203, 0x7e5e0202, 1326 0x7e600201, 0x7e620200, 1327 0x7e640209, 0x7e660208, 1328 0x7e680207, 0x7e6a0206, 1329 0x7e6c0205, 0x7e6e0204, 1330 0x7e700203, 0x7e720202, 1331 0x7e740201, 0x7e760200, 1332 0x7e780209, 0x7e7a0208, 1333 0x7e7c0207, 0x7e7e0206, 1334 0xbf8a0000, 0xbf810000, 1335 }; 1336 1337 static const u32 sgpr_init_compute_shader[] = 1338 { 1339 0xbe8a0100, 0xbe8c0102, 1340 0xbe8e0104, 0xbe900106, 1341 0xbe920108, 0xbe940100, 1342 0xbe960102, 0xbe980104, 1343 0xbe9a0106, 0xbe9c0108, 1344 0xbe9e0100, 0xbea00102, 1345 0xbea20104, 0xbea40106, 1346 0xbea60108, 0xbea80100, 1347 0xbeaa0102, 0xbeac0104, 1348 0xbeae0106, 0xbeb00108, 1349 0xbeb20100, 0xbeb40102, 1350 0xbeb60104, 0xbeb80106, 1351 0xbeba0108, 0xbebc0100, 1352 0xbebe0102, 0xbec00104, 1353 0xbec20106, 0xbec40108, 1354 0xbec60100, 0xbec80102, 1355 0xbee60004, 0xbee70005, 1356 0xbeea0006, 0xbeeb0007, 1357 0xbee80008, 0xbee90009, 1358 0xbefc0000, 0xbf8a0000, 1359 0xbf810000, 0x00000000, 1360 }; 1361 1362 static const u32 vgpr_init_regs[] = 1363 { 1364 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1365 mmCOMPUTE_RESOURCE_LIMITS, 0, 1366 mmCOMPUTE_NUM_THREAD_X, 256*4, 1367 mmCOMPUTE_NUM_THREAD_Y, 1, 1368 mmCOMPUTE_NUM_THREAD_Z, 1, 1369 mmCOMPUTE_PGM_RSRC2, 20, 1370 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1371 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1372 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1373 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1374 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1375 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1376 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1377 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1378 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1379 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1380 }; 1381 1382 static const u32 sgpr1_init_regs[] = 1383 { 1384 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1385 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1386 mmCOMPUTE_NUM_THREAD_X, 256*5, 1387 mmCOMPUTE_NUM_THREAD_Y, 1, 1388 mmCOMPUTE_NUM_THREAD_Z, 1, 1389 mmCOMPUTE_PGM_RSRC2, 20, 1390 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1391 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1392 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1393 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1394 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1395 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1396 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1397 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1398 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1399 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1400 }; 1401 1402 static const u32 sgpr2_init_regs[] = 1403 { 1404 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1405 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1406 mmCOMPUTE_NUM_THREAD_X, 256*5, 1407 mmCOMPUTE_NUM_THREAD_Y, 1, 1408 mmCOMPUTE_NUM_THREAD_Z, 1, 1409 mmCOMPUTE_PGM_RSRC2, 20, 1410 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1411 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1412 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1413 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1414 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1415 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1416 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1417 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1418 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1419 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1420 }; 1421 1422 static const u32 sec_ded_counter_registers[] = 1423 { 1424 mmCPC_EDC_ATC_CNT, 1425 mmCPC_EDC_SCRATCH_CNT, 1426 mmCPC_EDC_UCODE_CNT, 1427 mmCPF_EDC_ATC_CNT, 1428 mmCPF_EDC_ROQ_CNT, 1429 mmCPF_EDC_TAG_CNT, 1430 mmCPG_EDC_ATC_CNT, 1431 mmCPG_EDC_DMA_CNT, 1432 mmCPG_EDC_TAG_CNT, 1433 mmDC_EDC_CSINVOC_CNT, 1434 mmDC_EDC_RESTORE_CNT, 1435 mmDC_EDC_STATE_CNT, 1436 mmGDS_EDC_CNT, 1437 mmGDS_EDC_GRBM_CNT, 1438 mmGDS_EDC_OA_DED, 1439 mmSPI_EDC_CNT, 1440 mmSQC_ATC_EDC_GATCL1_CNT, 1441 mmSQC_EDC_CNT, 1442 mmSQ_EDC_DED_CNT, 1443 mmSQ_EDC_INFO, 1444 mmSQ_EDC_SEC_CNT, 1445 mmTCC_EDC_CNT, 1446 mmTCP_ATC_EDC_GATCL1_CNT, 1447 mmTCP_EDC_CNT, 1448 mmTD_EDC_CNT 1449 }; 1450 1451 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1452 { 1453 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1454 struct amdgpu_ib ib; 1455 struct fence *f = NULL; 1456 int r, i; 1457 u32 tmp; 1458 unsigned total_size, vgpr_offset, sgpr_offset; 1459 u64 gpu_addr; 1460 1461 /* only supported on CZ */ 1462 if (adev->asic_type != CHIP_CARRIZO) 1463 return 0; 1464 1465 /* bail if the compute ring is not ready */ 1466 if (!ring->ready) 1467 return 0; 1468 1469 tmp = RREG32(mmGB_EDC_MODE); 1470 WREG32(mmGB_EDC_MODE, 0); 1471 1472 total_size = 1473 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1474 total_size += 1475 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1476 total_size += 1477 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1478 total_size = ALIGN(total_size, 256); 1479 vgpr_offset = total_size; 1480 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1481 sgpr_offset = total_size; 1482 total_size += sizeof(sgpr_init_compute_shader); 1483 1484 /* allocate an indirect buffer to put the commands in */ 1485 memset(&ib, 0, sizeof(ib)); 1486 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1487 if (r) { 1488 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1489 return r; 1490 } 1491 1492 /* load the compute shaders */ 1493 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1494 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1495 1496 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1497 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1498 1499 /* init the ib length to 0 */ 1500 ib.length_dw = 0; 1501 1502 /* VGPR */ 1503 /* write the register state for the compute dispatch */ 1504 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1505 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1506 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1507 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1508 } 1509 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1510 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1511 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1512 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1513 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1514 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1515 1516 /* write dispatch packet */ 1517 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1518 ib.ptr[ib.length_dw++] = 8; /* x */ 1519 ib.ptr[ib.length_dw++] = 1; /* y */ 1520 ib.ptr[ib.length_dw++] = 1; /* z */ 1521 ib.ptr[ib.length_dw++] = 1522 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1523 1524 /* write CS partial flush packet */ 1525 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1526 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1527 1528 /* SGPR1 */ 1529 /* write the register state for the compute dispatch */ 1530 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1531 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1532 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1533 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1534 } 1535 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1536 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1537 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1538 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1539 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1540 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1541 1542 /* write dispatch packet */ 1543 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1544 ib.ptr[ib.length_dw++] = 8; /* x */ 1545 ib.ptr[ib.length_dw++] = 1; /* y */ 1546 ib.ptr[ib.length_dw++] = 1; /* z */ 1547 ib.ptr[ib.length_dw++] = 1548 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1549 1550 /* write CS partial flush packet */ 1551 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1552 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1553 1554 /* SGPR2 */ 1555 /* write the register state for the compute dispatch */ 1556 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1557 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1558 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1559 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1560 } 1561 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1562 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1563 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1564 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1565 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1566 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1567 1568 /* write dispatch packet */ 1569 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1570 ib.ptr[ib.length_dw++] = 8; /* x */ 1571 ib.ptr[ib.length_dw++] = 1; /* y */ 1572 ib.ptr[ib.length_dw++] = 1; /* z */ 1573 ib.ptr[ib.length_dw++] = 1574 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1575 1576 /* write CS partial flush packet */ 1577 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1578 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1579 1580 /* shedule the ib on the ring */ 1581 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); 1582 if (r) { 1583 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1584 goto fail; 1585 } 1586 1587 /* wait for the GPU to finish processing the IB */ 1588 r = fence_wait(f, false); 1589 if (r) { 1590 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1591 goto fail; 1592 } 1593 1594 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1595 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1596 WREG32(mmGB_EDC_MODE, tmp); 1597 1598 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1599 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1600 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1601 1602 1603 /* read back registers to clear the counters */ 1604 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1605 RREG32(sec_ded_counter_registers[i]); 1606 1607 fail: 1608 fence_put(f); 1609 amdgpu_ib_free(adev, &ib, NULL); 1610 fence_put(f); 1611 1612 return r; 1613 } 1614 1615 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1616 { 1617 u32 gb_addr_config; 1618 u32 mc_shared_chmap, mc_arb_ramcfg; 1619 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1620 u32 tmp; 1621 int ret; 1622 1623 switch (adev->asic_type) { 1624 case CHIP_TOPAZ: 1625 adev->gfx.config.max_shader_engines = 1; 1626 adev->gfx.config.max_tile_pipes = 2; 1627 adev->gfx.config.max_cu_per_sh = 6; 1628 adev->gfx.config.max_sh_per_se = 1; 1629 adev->gfx.config.max_backends_per_se = 2; 1630 adev->gfx.config.max_texture_channel_caches = 2; 1631 adev->gfx.config.max_gprs = 256; 1632 adev->gfx.config.max_gs_threads = 32; 1633 adev->gfx.config.max_hw_contexts = 8; 1634 1635 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1636 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1637 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1638 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1639 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1640 break; 1641 case CHIP_FIJI: 1642 adev->gfx.config.max_shader_engines = 4; 1643 adev->gfx.config.max_tile_pipes = 16; 1644 adev->gfx.config.max_cu_per_sh = 16; 1645 adev->gfx.config.max_sh_per_se = 1; 1646 adev->gfx.config.max_backends_per_se = 4; 1647 adev->gfx.config.max_texture_channel_caches = 16; 1648 adev->gfx.config.max_gprs = 256; 1649 adev->gfx.config.max_gs_threads = 32; 1650 adev->gfx.config.max_hw_contexts = 8; 1651 1652 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1653 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1654 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1655 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1656 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1657 break; 1658 case CHIP_POLARIS11: 1659 ret = amdgpu_atombios_get_gfx_info(adev); 1660 if (ret) 1661 return ret; 1662 adev->gfx.config.max_gprs = 256; 1663 adev->gfx.config.max_gs_threads = 32; 1664 adev->gfx.config.max_hw_contexts = 8; 1665 1666 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1667 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1668 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1669 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1670 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1671 break; 1672 case CHIP_POLARIS10: 1673 ret = amdgpu_atombios_get_gfx_info(adev); 1674 if (ret) 1675 return ret; 1676 adev->gfx.config.max_gprs = 256; 1677 adev->gfx.config.max_gs_threads = 32; 1678 adev->gfx.config.max_hw_contexts = 8; 1679 1680 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1681 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1682 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1683 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1684 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1685 break; 1686 case CHIP_TONGA: 1687 adev->gfx.config.max_shader_engines = 4; 1688 adev->gfx.config.max_tile_pipes = 8; 1689 adev->gfx.config.max_cu_per_sh = 8; 1690 adev->gfx.config.max_sh_per_se = 1; 1691 adev->gfx.config.max_backends_per_se = 2; 1692 adev->gfx.config.max_texture_channel_caches = 8; 1693 adev->gfx.config.max_gprs = 256; 1694 adev->gfx.config.max_gs_threads = 32; 1695 adev->gfx.config.max_hw_contexts = 8; 1696 1697 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1698 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1699 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1700 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1701 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1702 break; 1703 case CHIP_CARRIZO: 1704 adev->gfx.config.max_shader_engines = 1; 1705 adev->gfx.config.max_tile_pipes = 2; 1706 adev->gfx.config.max_sh_per_se = 1; 1707 adev->gfx.config.max_backends_per_se = 2; 1708 1709 switch (adev->pdev->revision) { 1710 case 0xc4: 1711 case 0x84: 1712 case 0xc8: 1713 case 0xcc: 1714 case 0xe1: 1715 case 0xe3: 1716 /* B10 */ 1717 adev->gfx.config.max_cu_per_sh = 8; 1718 break; 1719 case 0xc5: 1720 case 0x81: 1721 case 0x85: 1722 case 0xc9: 1723 case 0xcd: 1724 case 0xe2: 1725 case 0xe4: 1726 /* B8 */ 1727 adev->gfx.config.max_cu_per_sh = 6; 1728 break; 1729 case 0xc6: 1730 case 0xca: 1731 case 0xce: 1732 case 0x88: 1733 /* B6 */ 1734 adev->gfx.config.max_cu_per_sh = 6; 1735 break; 1736 case 0xc7: 1737 case 0x87: 1738 case 0xcb: 1739 case 0xe5: 1740 case 0x89: 1741 default: 1742 /* B4 */ 1743 adev->gfx.config.max_cu_per_sh = 4; 1744 break; 1745 } 1746 1747 adev->gfx.config.max_texture_channel_caches = 2; 1748 adev->gfx.config.max_gprs = 256; 1749 adev->gfx.config.max_gs_threads = 32; 1750 adev->gfx.config.max_hw_contexts = 8; 1751 1752 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1753 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1754 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1755 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1756 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1757 break; 1758 case CHIP_STONEY: 1759 adev->gfx.config.max_shader_engines = 1; 1760 adev->gfx.config.max_tile_pipes = 2; 1761 adev->gfx.config.max_sh_per_se = 1; 1762 adev->gfx.config.max_backends_per_se = 1; 1763 1764 switch (adev->pdev->revision) { 1765 case 0xc0: 1766 case 0xc1: 1767 case 0xc2: 1768 case 0xc4: 1769 case 0xc8: 1770 case 0xc9: 1771 adev->gfx.config.max_cu_per_sh = 3; 1772 break; 1773 case 0xd0: 1774 case 0xd1: 1775 case 0xd2: 1776 default: 1777 adev->gfx.config.max_cu_per_sh = 2; 1778 break; 1779 } 1780 1781 adev->gfx.config.max_texture_channel_caches = 2; 1782 adev->gfx.config.max_gprs = 256; 1783 adev->gfx.config.max_gs_threads = 16; 1784 adev->gfx.config.max_hw_contexts = 8; 1785 1786 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1787 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1788 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1789 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1790 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1791 break; 1792 default: 1793 adev->gfx.config.max_shader_engines = 2; 1794 adev->gfx.config.max_tile_pipes = 4; 1795 adev->gfx.config.max_cu_per_sh = 2; 1796 adev->gfx.config.max_sh_per_se = 1; 1797 adev->gfx.config.max_backends_per_se = 2; 1798 adev->gfx.config.max_texture_channel_caches = 4; 1799 adev->gfx.config.max_gprs = 256; 1800 adev->gfx.config.max_gs_threads = 32; 1801 adev->gfx.config.max_hw_contexts = 8; 1802 1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1807 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1808 break; 1809 } 1810 1811 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1812 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1813 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1814 1815 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1816 adev->gfx.config.mem_max_burst_length_bytes = 256; 1817 if (adev->flags & AMD_IS_APU) { 1818 /* Get memory bank mapping mode. */ 1819 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1820 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1821 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1822 1823 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1824 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1825 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1826 1827 /* Validate settings in case only one DIMM installed. */ 1828 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1829 dimm00_addr_map = 0; 1830 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1831 dimm01_addr_map = 0; 1832 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1833 dimm10_addr_map = 0; 1834 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1835 dimm11_addr_map = 0; 1836 1837 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1838 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1839 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1840 adev->gfx.config.mem_row_size_in_kb = 2; 1841 else 1842 adev->gfx.config.mem_row_size_in_kb = 1; 1843 } else { 1844 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1845 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1846 if (adev->gfx.config.mem_row_size_in_kb > 4) 1847 adev->gfx.config.mem_row_size_in_kb = 4; 1848 } 1849 1850 adev->gfx.config.shader_engine_tile_size = 32; 1851 adev->gfx.config.num_gpus = 1; 1852 adev->gfx.config.multi_gpu_tile_size = 64; 1853 1854 /* fix up row size */ 1855 switch (adev->gfx.config.mem_row_size_in_kb) { 1856 case 1: 1857 default: 1858 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1859 break; 1860 case 2: 1861 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1862 break; 1863 case 4: 1864 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1865 break; 1866 } 1867 adev->gfx.config.gb_addr_config = gb_addr_config; 1868 1869 return 0; 1870 } 1871 1872 static int gfx_v8_0_sw_init(void *handle) 1873 { 1874 int i, r; 1875 struct amdgpu_ring *ring; 1876 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1877 1878 /* EOP Event */ 1879 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 1880 if (r) 1881 return r; 1882 1883 /* Privileged reg */ 1884 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq); 1885 if (r) 1886 return r; 1887 1888 /* Privileged inst */ 1889 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq); 1890 if (r) 1891 return r; 1892 1893 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1894 1895 gfx_v8_0_scratch_init(adev); 1896 1897 r = gfx_v8_0_init_microcode(adev); 1898 if (r) { 1899 DRM_ERROR("Failed to load gfx firmware!\n"); 1900 return r; 1901 } 1902 1903 r = gfx_v8_0_rlc_init(adev); 1904 if (r) { 1905 DRM_ERROR("Failed to init rlc BOs!\n"); 1906 return r; 1907 } 1908 1909 r = gfx_v8_0_mec_init(adev); 1910 if (r) { 1911 DRM_ERROR("Failed to init MEC BOs!\n"); 1912 return r; 1913 } 1914 1915 /* set up the gfx ring */ 1916 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1917 ring = &adev->gfx.gfx_ring[i]; 1918 ring->ring_obj = NULL; 1919 sprintf(ring->name, "gfx"); 1920 /* no gfx doorbells on iceland */ 1921 if (adev->asic_type != CHIP_TOPAZ) { 1922 ring->use_doorbell = true; 1923 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 1924 } 1925 1926 r = amdgpu_ring_init(adev, ring, 1024, 1927 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1928 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP, 1929 AMDGPU_RING_TYPE_GFX); 1930 if (r) 1931 return r; 1932 } 1933 1934 /* set up the compute queues */ 1935 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 1936 unsigned irq_type; 1937 1938 /* max 32 queues per MEC */ 1939 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 1940 DRM_ERROR("Too many (%d) compute rings!\n", i); 1941 break; 1942 } 1943 ring = &adev->gfx.compute_ring[i]; 1944 ring->ring_obj = NULL; 1945 ring->use_doorbell = true; 1946 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 1947 ring->me = 1; /* first MEC */ 1948 ring->pipe = i / 8; 1949 ring->queue = i % 8; 1950 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1951 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 1952 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1953 r = amdgpu_ring_init(adev, ring, 1024, 1954 PACKET3(PACKET3_NOP, 0x3FFF), 0xf, 1955 &adev->gfx.eop_irq, irq_type, 1956 AMDGPU_RING_TYPE_COMPUTE); 1957 if (r) 1958 return r; 1959 } 1960 1961 /* reserve GDS, GWS and OA resource for gfx */ 1962 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, 1963 PAGE_SIZE, true, 1964 AMDGPU_GEM_DOMAIN_GDS, 0, NULL, 1965 NULL, &adev->gds.gds_gfx_bo); 1966 if (r) 1967 return r; 1968 1969 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, 1970 PAGE_SIZE, true, 1971 AMDGPU_GEM_DOMAIN_GWS, 0, NULL, 1972 NULL, &adev->gds.gws_gfx_bo); 1973 if (r) 1974 return r; 1975 1976 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, 1977 PAGE_SIZE, true, 1978 AMDGPU_GEM_DOMAIN_OA, 0, NULL, 1979 NULL, &adev->gds.oa_gfx_bo); 1980 if (r) 1981 return r; 1982 1983 adev->gfx.ce_ram_size = 0x8000; 1984 1985 r = gfx_v8_0_gpu_early_init(adev); 1986 if (r) 1987 return r; 1988 1989 return 0; 1990 } 1991 1992 static int gfx_v8_0_sw_fini(void *handle) 1993 { 1994 int i; 1995 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 1996 1997 amdgpu_bo_unref(&adev->gds.oa_gfx_bo); 1998 amdgpu_bo_unref(&adev->gds.gws_gfx_bo); 1999 amdgpu_bo_unref(&adev->gds.gds_gfx_bo); 2000 2001 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2002 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2003 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2004 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2005 2006 gfx_v8_0_mec_fini(adev); 2007 2008 gfx_v8_0_rlc_fini(adev); 2009 2010 gfx_v8_0_free_microcode(adev); 2011 2012 return 0; 2013 } 2014 2015 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2016 { 2017 uint32_t *modearray, *mod2array; 2018 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2019 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2020 u32 reg_offset; 2021 2022 modearray = adev->gfx.config.tile_mode_array; 2023 mod2array = adev->gfx.config.macrotile_mode_array; 2024 2025 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2026 modearray[reg_offset] = 0; 2027 2028 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2029 mod2array[reg_offset] = 0; 2030 2031 switch (adev->asic_type) { 2032 case CHIP_TOPAZ: 2033 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2034 PIPE_CONFIG(ADDR_SURF_P2) | 2035 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2036 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2037 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2038 PIPE_CONFIG(ADDR_SURF_P2) | 2039 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2040 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2041 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2042 PIPE_CONFIG(ADDR_SURF_P2) | 2043 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2044 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2045 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2046 PIPE_CONFIG(ADDR_SURF_P2) | 2047 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2048 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2049 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2050 PIPE_CONFIG(ADDR_SURF_P2) | 2051 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2052 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2053 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2054 PIPE_CONFIG(ADDR_SURF_P2) | 2055 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2056 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2057 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2058 PIPE_CONFIG(ADDR_SURF_P2) | 2059 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2060 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2061 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2062 PIPE_CONFIG(ADDR_SURF_P2)); 2063 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2064 PIPE_CONFIG(ADDR_SURF_P2) | 2065 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2067 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2068 PIPE_CONFIG(ADDR_SURF_P2) | 2069 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2071 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2072 PIPE_CONFIG(ADDR_SURF_P2) | 2073 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2075 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2076 PIPE_CONFIG(ADDR_SURF_P2) | 2077 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2079 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2080 PIPE_CONFIG(ADDR_SURF_P2) | 2081 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2083 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2084 PIPE_CONFIG(ADDR_SURF_P2) | 2085 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2087 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2088 PIPE_CONFIG(ADDR_SURF_P2) | 2089 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2091 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2092 PIPE_CONFIG(ADDR_SURF_P2) | 2093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2095 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2096 PIPE_CONFIG(ADDR_SURF_P2) | 2097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2099 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2100 PIPE_CONFIG(ADDR_SURF_P2) | 2101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2103 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2104 PIPE_CONFIG(ADDR_SURF_P2) | 2105 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2107 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2108 PIPE_CONFIG(ADDR_SURF_P2) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2111 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2112 PIPE_CONFIG(ADDR_SURF_P2) | 2113 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2115 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2116 PIPE_CONFIG(ADDR_SURF_P2) | 2117 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2119 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2120 PIPE_CONFIG(ADDR_SURF_P2) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2123 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2124 PIPE_CONFIG(ADDR_SURF_P2) | 2125 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2127 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2128 PIPE_CONFIG(ADDR_SURF_P2) | 2129 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2131 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2132 PIPE_CONFIG(ADDR_SURF_P2) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2135 2136 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2137 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2138 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2139 NUM_BANKS(ADDR_SURF_8_BANK)); 2140 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2143 NUM_BANKS(ADDR_SURF_8_BANK)); 2144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2147 NUM_BANKS(ADDR_SURF_8_BANK)); 2148 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2149 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2150 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2151 NUM_BANKS(ADDR_SURF_8_BANK)); 2152 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2155 NUM_BANKS(ADDR_SURF_8_BANK)); 2156 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2157 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2158 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2159 NUM_BANKS(ADDR_SURF_8_BANK)); 2160 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2163 NUM_BANKS(ADDR_SURF_8_BANK)); 2164 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2167 NUM_BANKS(ADDR_SURF_16_BANK)); 2168 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2169 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2170 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2171 NUM_BANKS(ADDR_SURF_16_BANK)); 2172 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2175 NUM_BANKS(ADDR_SURF_16_BANK)); 2176 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2177 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2178 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2179 NUM_BANKS(ADDR_SURF_16_BANK)); 2180 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2183 NUM_BANKS(ADDR_SURF_16_BANK)); 2184 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2187 NUM_BANKS(ADDR_SURF_16_BANK)); 2188 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2191 NUM_BANKS(ADDR_SURF_8_BANK)); 2192 2193 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2194 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2195 reg_offset != 23) 2196 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2197 2198 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2199 if (reg_offset != 7) 2200 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2201 2202 break; 2203 case CHIP_FIJI: 2204 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2205 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2206 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2207 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2208 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2209 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2210 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2211 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2212 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2213 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2214 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2215 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2216 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2217 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2220 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2221 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2222 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2223 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2224 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2225 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2228 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2229 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2232 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2233 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2236 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2237 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2238 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2239 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2240 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2242 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2243 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2244 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2246 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2247 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2248 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2250 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2251 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2254 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2255 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2256 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2258 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2259 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2260 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2262 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2263 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2264 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2266 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2267 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2270 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2271 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2274 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2275 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2278 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2279 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2282 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2283 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2286 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2287 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2290 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2291 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2294 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2295 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2298 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2299 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2302 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2303 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2306 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2307 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2310 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2311 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2312 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2314 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2315 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2316 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2318 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2322 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2323 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2326 2327 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2330 NUM_BANKS(ADDR_SURF_8_BANK)); 2331 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2334 NUM_BANKS(ADDR_SURF_8_BANK)); 2335 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2338 NUM_BANKS(ADDR_SURF_8_BANK)); 2339 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2342 NUM_BANKS(ADDR_SURF_8_BANK)); 2343 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2346 NUM_BANKS(ADDR_SURF_8_BANK)); 2347 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2350 NUM_BANKS(ADDR_SURF_8_BANK)); 2351 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2354 NUM_BANKS(ADDR_SURF_8_BANK)); 2355 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2358 NUM_BANKS(ADDR_SURF_8_BANK)); 2359 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2362 NUM_BANKS(ADDR_SURF_8_BANK)); 2363 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2366 NUM_BANKS(ADDR_SURF_8_BANK)); 2367 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2370 NUM_BANKS(ADDR_SURF_8_BANK)); 2371 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2374 NUM_BANKS(ADDR_SURF_8_BANK)); 2375 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2378 NUM_BANKS(ADDR_SURF_8_BANK)); 2379 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2382 NUM_BANKS(ADDR_SURF_4_BANK)); 2383 2384 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2385 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2386 2387 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2388 if (reg_offset != 7) 2389 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2390 2391 break; 2392 case CHIP_TONGA: 2393 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2394 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2397 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2398 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2401 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2402 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2405 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2406 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2409 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2410 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2413 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2414 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2417 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2418 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2419 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2420 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2421 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2422 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2423 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2424 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2425 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2426 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2427 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2428 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2431 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2432 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2435 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2436 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2439 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2440 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2443 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2444 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2447 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2448 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2451 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2452 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2455 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2456 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2459 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2460 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2463 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2464 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2467 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2468 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2471 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2472 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2475 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2476 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2479 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2480 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2481 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2483 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2484 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2485 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2487 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2488 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2489 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2491 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2492 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2493 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2494 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2495 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2496 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2497 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2498 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2499 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2500 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2501 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2502 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2503 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2504 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2505 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2506 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2507 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2509 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2510 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2511 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2512 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2513 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2514 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2515 2516 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2519 NUM_BANKS(ADDR_SURF_16_BANK)); 2520 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2523 NUM_BANKS(ADDR_SURF_16_BANK)); 2524 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2527 NUM_BANKS(ADDR_SURF_16_BANK)); 2528 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2531 NUM_BANKS(ADDR_SURF_16_BANK)); 2532 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2535 NUM_BANKS(ADDR_SURF_16_BANK)); 2536 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2539 NUM_BANKS(ADDR_SURF_16_BANK)); 2540 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2543 NUM_BANKS(ADDR_SURF_16_BANK)); 2544 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2547 NUM_BANKS(ADDR_SURF_16_BANK)); 2548 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2549 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2550 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2551 NUM_BANKS(ADDR_SURF_16_BANK)); 2552 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2553 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2554 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2555 NUM_BANKS(ADDR_SURF_16_BANK)); 2556 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2557 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2558 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2559 NUM_BANKS(ADDR_SURF_16_BANK)); 2560 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2561 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2562 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2563 NUM_BANKS(ADDR_SURF_8_BANK)); 2564 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2565 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2566 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2567 NUM_BANKS(ADDR_SURF_4_BANK)); 2568 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2569 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2570 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2571 NUM_BANKS(ADDR_SURF_4_BANK)); 2572 2573 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2574 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2575 2576 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2577 if (reg_offset != 7) 2578 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2579 2580 break; 2581 case CHIP_POLARIS11: 2582 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2583 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2584 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2585 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2586 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2587 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2588 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2589 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2590 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2591 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2592 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2593 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2594 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2595 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2596 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2597 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2598 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2599 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2600 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2601 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2602 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2603 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2604 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2605 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2606 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2607 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2608 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2609 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2610 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2611 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2612 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2613 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2614 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2615 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2616 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2617 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2618 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2619 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2620 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2624 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2625 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2627 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2628 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2631 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2632 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2636 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2639 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2640 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2643 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2644 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2648 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2651 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2652 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2655 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2656 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2658 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2659 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2660 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2661 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2662 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2663 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2664 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2665 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2666 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2667 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2668 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2669 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2670 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2671 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2672 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2673 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2674 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2675 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2676 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2677 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2678 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2679 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2680 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2681 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2682 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2683 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2684 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2685 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2686 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2687 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2688 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2689 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2690 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2691 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2692 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2693 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2694 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2695 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2696 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2698 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2699 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2700 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2702 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2703 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2704 2705 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2706 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2707 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2708 NUM_BANKS(ADDR_SURF_16_BANK)); 2709 2710 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2711 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2712 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2713 NUM_BANKS(ADDR_SURF_16_BANK)); 2714 2715 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2718 NUM_BANKS(ADDR_SURF_16_BANK)); 2719 2720 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2721 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2722 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2723 NUM_BANKS(ADDR_SURF_16_BANK)); 2724 2725 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2728 NUM_BANKS(ADDR_SURF_16_BANK)); 2729 2730 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2731 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2732 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2733 NUM_BANKS(ADDR_SURF_16_BANK)); 2734 2735 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2738 NUM_BANKS(ADDR_SURF_16_BANK)); 2739 2740 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2743 NUM_BANKS(ADDR_SURF_16_BANK)); 2744 2745 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2746 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2747 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2748 NUM_BANKS(ADDR_SURF_16_BANK)); 2749 2750 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2753 NUM_BANKS(ADDR_SURF_16_BANK)); 2754 2755 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2758 NUM_BANKS(ADDR_SURF_16_BANK)); 2759 2760 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2763 NUM_BANKS(ADDR_SURF_16_BANK)); 2764 2765 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2766 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2767 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2768 NUM_BANKS(ADDR_SURF_8_BANK)); 2769 2770 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2773 NUM_BANKS(ADDR_SURF_4_BANK)); 2774 2775 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2776 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2777 2778 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2779 if (reg_offset != 7) 2780 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2781 2782 break; 2783 case CHIP_POLARIS10: 2784 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2785 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2786 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2788 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2789 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2790 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2791 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2792 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2793 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2794 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2795 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2796 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2797 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2798 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2799 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2800 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2801 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2802 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2803 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2804 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2805 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2806 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2807 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2808 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2809 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2810 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2811 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2812 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2813 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2814 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2815 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2816 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2817 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2818 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2819 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2822 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2823 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2825 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2826 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2827 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2830 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2833 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2834 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2838 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2842 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2844 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2845 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2846 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2848 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2849 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2850 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2851 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2852 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2854 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2856 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2857 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2858 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2859 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2860 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2861 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2862 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2863 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2864 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2866 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2867 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2868 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2869 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2870 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2871 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2872 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2873 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2874 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2875 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2876 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2877 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2878 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2879 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2880 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2881 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2882 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2883 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2884 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2885 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2886 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2887 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2888 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2889 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2890 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2891 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2892 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2893 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2894 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2895 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2896 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2897 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2898 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2899 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2900 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2901 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2902 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2903 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2904 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2905 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2906 2907 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2910 NUM_BANKS(ADDR_SURF_16_BANK)); 2911 2912 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2913 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2914 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2915 NUM_BANKS(ADDR_SURF_16_BANK)); 2916 2917 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2918 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2919 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2920 NUM_BANKS(ADDR_SURF_16_BANK)); 2921 2922 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2923 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2924 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2925 NUM_BANKS(ADDR_SURF_16_BANK)); 2926 2927 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2928 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2929 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2930 NUM_BANKS(ADDR_SURF_16_BANK)); 2931 2932 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2935 NUM_BANKS(ADDR_SURF_16_BANK)); 2936 2937 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2940 NUM_BANKS(ADDR_SURF_16_BANK)); 2941 2942 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2945 NUM_BANKS(ADDR_SURF_16_BANK)); 2946 2947 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2950 NUM_BANKS(ADDR_SURF_16_BANK)); 2951 2952 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2955 NUM_BANKS(ADDR_SURF_16_BANK)); 2956 2957 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2960 NUM_BANKS(ADDR_SURF_16_BANK)); 2961 2962 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2965 NUM_BANKS(ADDR_SURF_8_BANK)); 2966 2967 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2970 NUM_BANKS(ADDR_SURF_4_BANK)); 2971 2972 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2975 NUM_BANKS(ADDR_SURF_4_BANK)); 2976 2977 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2978 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2979 2980 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2981 if (reg_offset != 7) 2982 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2983 2984 break; 2985 case CHIP_STONEY: 2986 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2987 PIPE_CONFIG(ADDR_SURF_P2) | 2988 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2989 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2990 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2991 PIPE_CONFIG(ADDR_SURF_P2) | 2992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2993 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2994 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2995 PIPE_CONFIG(ADDR_SURF_P2) | 2996 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2997 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2998 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2999 PIPE_CONFIG(ADDR_SURF_P2) | 3000 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3001 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3002 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3003 PIPE_CONFIG(ADDR_SURF_P2) | 3004 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3005 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3006 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3007 PIPE_CONFIG(ADDR_SURF_P2) | 3008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3009 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3010 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3011 PIPE_CONFIG(ADDR_SURF_P2) | 3012 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3013 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3014 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3015 PIPE_CONFIG(ADDR_SURF_P2)); 3016 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3017 PIPE_CONFIG(ADDR_SURF_P2) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3020 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3021 PIPE_CONFIG(ADDR_SURF_P2) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3024 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3025 PIPE_CONFIG(ADDR_SURF_P2) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3027 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3028 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3029 PIPE_CONFIG(ADDR_SURF_P2) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3031 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3032 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P2) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3036 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P2) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3040 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3041 PIPE_CONFIG(ADDR_SURF_P2) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3043 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3044 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3045 PIPE_CONFIG(ADDR_SURF_P2) | 3046 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3048 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3049 PIPE_CONFIG(ADDR_SURF_P2) | 3050 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3051 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3052 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3053 PIPE_CONFIG(ADDR_SURF_P2) | 3054 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3055 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3056 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3057 PIPE_CONFIG(ADDR_SURF_P2) | 3058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3060 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3061 PIPE_CONFIG(ADDR_SURF_P2) | 3062 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3063 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3064 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3065 PIPE_CONFIG(ADDR_SURF_P2) | 3066 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3067 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3068 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3069 PIPE_CONFIG(ADDR_SURF_P2) | 3070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3072 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3073 PIPE_CONFIG(ADDR_SURF_P2) | 3074 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3075 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3076 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3077 PIPE_CONFIG(ADDR_SURF_P2) | 3078 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3079 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3080 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3081 PIPE_CONFIG(ADDR_SURF_P2) | 3082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3084 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3085 PIPE_CONFIG(ADDR_SURF_P2) | 3086 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3087 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3088 3089 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3090 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3091 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3092 NUM_BANKS(ADDR_SURF_8_BANK)); 3093 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3096 NUM_BANKS(ADDR_SURF_8_BANK)); 3097 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3098 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3099 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3100 NUM_BANKS(ADDR_SURF_8_BANK)); 3101 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3102 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3103 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3104 NUM_BANKS(ADDR_SURF_8_BANK)); 3105 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3108 NUM_BANKS(ADDR_SURF_8_BANK)); 3109 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3110 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3111 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3112 NUM_BANKS(ADDR_SURF_8_BANK)); 3113 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3116 NUM_BANKS(ADDR_SURF_8_BANK)); 3117 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3120 NUM_BANKS(ADDR_SURF_16_BANK)); 3121 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3122 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3123 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3124 NUM_BANKS(ADDR_SURF_16_BANK)); 3125 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3128 NUM_BANKS(ADDR_SURF_16_BANK)); 3129 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3132 NUM_BANKS(ADDR_SURF_16_BANK)); 3133 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3134 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3135 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3136 NUM_BANKS(ADDR_SURF_16_BANK)); 3137 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3140 NUM_BANKS(ADDR_SURF_16_BANK)); 3141 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3142 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3143 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3144 NUM_BANKS(ADDR_SURF_8_BANK)); 3145 3146 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3147 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3148 reg_offset != 23) 3149 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3150 3151 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3152 if (reg_offset != 7) 3153 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3154 3155 break; 3156 default: 3157 dev_warn(adev->dev, 3158 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3159 adev->asic_type); 3160 3161 case CHIP_CARRIZO: 3162 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3163 PIPE_CONFIG(ADDR_SURF_P2) | 3164 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3165 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3166 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3167 PIPE_CONFIG(ADDR_SURF_P2) | 3168 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3169 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3170 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3171 PIPE_CONFIG(ADDR_SURF_P2) | 3172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3173 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3174 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3175 PIPE_CONFIG(ADDR_SURF_P2) | 3176 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3177 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3178 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3179 PIPE_CONFIG(ADDR_SURF_P2) | 3180 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3181 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3182 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3183 PIPE_CONFIG(ADDR_SURF_P2) | 3184 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3185 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3186 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3187 PIPE_CONFIG(ADDR_SURF_P2) | 3188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3189 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3190 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3191 PIPE_CONFIG(ADDR_SURF_P2)); 3192 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3193 PIPE_CONFIG(ADDR_SURF_P2) | 3194 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3195 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3196 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3197 PIPE_CONFIG(ADDR_SURF_P2) | 3198 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3199 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3200 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3201 PIPE_CONFIG(ADDR_SURF_P2) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3203 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3204 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3205 PIPE_CONFIG(ADDR_SURF_P2) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3207 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3208 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3209 PIPE_CONFIG(ADDR_SURF_P2) | 3210 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3211 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3212 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3213 PIPE_CONFIG(ADDR_SURF_P2) | 3214 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3215 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3216 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3217 PIPE_CONFIG(ADDR_SURF_P2) | 3218 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3220 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3221 PIPE_CONFIG(ADDR_SURF_P2) | 3222 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3224 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3225 PIPE_CONFIG(ADDR_SURF_P2) | 3226 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3228 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3229 PIPE_CONFIG(ADDR_SURF_P2) | 3230 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3232 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3233 PIPE_CONFIG(ADDR_SURF_P2) | 3234 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3236 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3237 PIPE_CONFIG(ADDR_SURF_P2) | 3238 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3240 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3241 PIPE_CONFIG(ADDR_SURF_P2) | 3242 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3244 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3245 PIPE_CONFIG(ADDR_SURF_P2) | 3246 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3248 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3249 PIPE_CONFIG(ADDR_SURF_P2) | 3250 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3252 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3253 PIPE_CONFIG(ADDR_SURF_P2) | 3254 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3256 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3257 PIPE_CONFIG(ADDR_SURF_P2) | 3258 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3260 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3261 PIPE_CONFIG(ADDR_SURF_P2) | 3262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3264 3265 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3268 NUM_BANKS(ADDR_SURF_8_BANK)); 3269 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3270 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3271 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3272 NUM_BANKS(ADDR_SURF_8_BANK)); 3273 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3274 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3275 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3276 NUM_BANKS(ADDR_SURF_8_BANK)); 3277 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3280 NUM_BANKS(ADDR_SURF_8_BANK)); 3281 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3282 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3283 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3284 NUM_BANKS(ADDR_SURF_8_BANK)); 3285 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3286 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3287 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3288 NUM_BANKS(ADDR_SURF_8_BANK)); 3289 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3292 NUM_BANKS(ADDR_SURF_8_BANK)); 3293 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3296 NUM_BANKS(ADDR_SURF_16_BANK)); 3297 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3300 NUM_BANKS(ADDR_SURF_16_BANK)); 3301 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3304 NUM_BANKS(ADDR_SURF_16_BANK)); 3305 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3308 NUM_BANKS(ADDR_SURF_16_BANK)); 3309 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3312 NUM_BANKS(ADDR_SURF_16_BANK)); 3313 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3316 NUM_BANKS(ADDR_SURF_16_BANK)); 3317 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3320 NUM_BANKS(ADDR_SURF_8_BANK)); 3321 3322 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3323 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3324 reg_offset != 23) 3325 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3326 3327 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3328 if (reg_offset != 7) 3329 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3330 3331 break; 3332 } 3333 } 3334 3335 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) 3336 { 3337 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3338 3339 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) { 3340 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3341 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3342 } else if (se_num == 0xffffffff) { 3343 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3344 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3345 } else if (sh_num == 0xffffffff) { 3346 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3347 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3348 } else { 3349 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3350 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3351 } 3352 WREG32(mmGRBM_GFX_INDEX, data); 3353 } 3354 3355 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3356 { 3357 return (u32)((1ULL << bit_width) - 1); 3358 } 3359 3360 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3361 { 3362 u32 data, mask; 3363 3364 data = RREG32(mmCC_RB_BACKEND_DISABLE); 3365 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3366 3367 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; 3368 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; 3369 3370 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3371 adev->gfx.config.max_sh_per_se); 3372 3373 return (~data) & mask; 3374 } 3375 3376 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3377 { 3378 int i, j; 3379 u32 data; 3380 u32 active_rbs = 0; 3381 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3382 adev->gfx.config.max_sh_per_se; 3383 3384 mutex_lock(&adev->grbm_idx_mutex); 3385 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3386 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3387 gfx_v8_0_select_se_sh(adev, i, j); 3388 data = gfx_v8_0_get_rb_active_bitmap(adev); 3389 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3390 rb_bitmap_width_per_sh); 3391 } 3392 } 3393 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3394 mutex_unlock(&adev->grbm_idx_mutex); 3395 3396 adev->gfx.config.backend_enable_mask = active_rbs; 3397 adev->gfx.config.num_rbs = hweight32(active_rbs); 3398 } 3399 3400 /** 3401 * gfx_v8_0_init_compute_vmid - gart enable 3402 * 3403 * @rdev: amdgpu_device pointer 3404 * 3405 * Initialize compute vmid sh_mem registers 3406 * 3407 */ 3408 #define DEFAULT_SH_MEM_BASES (0x6000) 3409 #define FIRST_COMPUTE_VMID (8) 3410 #define LAST_COMPUTE_VMID (16) 3411 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3412 { 3413 int i; 3414 uint32_t sh_mem_config; 3415 uint32_t sh_mem_bases; 3416 3417 /* 3418 * Configure apertures: 3419 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3420 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3421 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3422 */ 3423 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3424 3425 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3426 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3427 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3428 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3429 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3430 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3431 3432 mutex_lock(&adev->srbm_mutex); 3433 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3434 vi_srbm_select(adev, 0, 0, 0, i); 3435 /* CP and shaders */ 3436 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3437 WREG32(mmSH_MEM_APE1_BASE, 1); 3438 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3439 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3440 } 3441 vi_srbm_select(adev, 0, 0, 0, 0); 3442 mutex_unlock(&adev->srbm_mutex); 3443 } 3444 3445 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3446 { 3447 u32 tmp; 3448 int i; 3449 3450 tmp = RREG32(mmGRBM_CNTL); 3451 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff); 3452 WREG32(mmGRBM_CNTL, tmp); 3453 3454 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3455 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3456 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3457 3458 gfx_v8_0_tiling_mode_table_init(adev); 3459 3460 gfx_v8_0_setup_rb(adev); 3461 gfx_v8_0_get_cu_info(adev); 3462 3463 /* XXX SH_MEM regs */ 3464 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3465 mutex_lock(&adev->srbm_mutex); 3466 for (i = 0; i < 16; i++) { 3467 vi_srbm_select(adev, 0, 0, 0, i); 3468 /* CP and shaders */ 3469 if (i == 0) { 3470 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3471 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3472 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3473 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3474 WREG32(mmSH_MEM_CONFIG, tmp); 3475 } else { 3476 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3477 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC); 3478 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3479 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3480 WREG32(mmSH_MEM_CONFIG, tmp); 3481 } 3482 3483 WREG32(mmSH_MEM_APE1_BASE, 1); 3484 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3485 WREG32(mmSH_MEM_BASES, 0); 3486 } 3487 vi_srbm_select(adev, 0, 0, 0, 0); 3488 mutex_unlock(&adev->srbm_mutex); 3489 3490 gfx_v8_0_init_compute_vmid(adev); 3491 3492 mutex_lock(&adev->grbm_idx_mutex); 3493 /* 3494 * making sure that the following register writes will be broadcasted 3495 * to all the shaders 3496 */ 3497 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3498 3499 WREG32(mmPA_SC_FIFO_SIZE, 3500 (adev->gfx.config.sc_prim_fifo_size_frontend << 3501 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3502 (adev->gfx.config.sc_prim_fifo_size_backend << 3503 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3504 (adev->gfx.config.sc_hiz_tile_fifo_size << 3505 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3506 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3507 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3508 mutex_unlock(&adev->grbm_idx_mutex); 3509 3510 } 3511 3512 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3513 { 3514 u32 i, j, k; 3515 u32 mask; 3516 3517 mutex_lock(&adev->grbm_idx_mutex); 3518 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3519 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3520 gfx_v8_0_select_se_sh(adev, i, j); 3521 for (k = 0; k < adev->usec_timeout; k++) { 3522 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3523 break; 3524 udelay(1); 3525 } 3526 } 3527 } 3528 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 3529 mutex_unlock(&adev->grbm_idx_mutex); 3530 3531 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3532 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3533 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3534 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3535 for (k = 0; k < adev->usec_timeout; k++) { 3536 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3537 break; 3538 udelay(1); 3539 } 3540 } 3541 3542 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3543 bool enable) 3544 { 3545 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3546 3547 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3548 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3549 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3550 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3551 3552 WREG32(mmCP_INT_CNTL_RING0, tmp); 3553 } 3554 3555 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3556 { 3557 /* csib */ 3558 WREG32(mmRLC_CSIB_ADDR_HI, 3559 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3560 WREG32(mmRLC_CSIB_ADDR_LO, 3561 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3562 WREG32(mmRLC_CSIB_LENGTH, 3563 adev->gfx.rlc.clear_state_size); 3564 } 3565 3566 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3567 int ind_offset, 3568 int list_size, 3569 int *unique_indices, 3570 int *indices_count, 3571 int max_indices, 3572 int *ind_start_offsets, 3573 int *offset_count, 3574 int max_offset) 3575 { 3576 int indices; 3577 bool new_entry = true; 3578 3579 for (; ind_offset < list_size; ind_offset++) { 3580 3581 if (new_entry) { 3582 new_entry = false; 3583 ind_start_offsets[*offset_count] = ind_offset; 3584 *offset_count = *offset_count + 1; 3585 BUG_ON(*offset_count >= max_offset); 3586 } 3587 3588 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3589 new_entry = true; 3590 continue; 3591 } 3592 3593 ind_offset += 2; 3594 3595 /* look for the matching indice */ 3596 for (indices = 0; 3597 indices < *indices_count; 3598 indices++) { 3599 if (unique_indices[indices] == 3600 register_list_format[ind_offset]) 3601 break; 3602 } 3603 3604 if (indices >= *indices_count) { 3605 unique_indices[*indices_count] = 3606 register_list_format[ind_offset]; 3607 indices = *indices_count; 3608 *indices_count = *indices_count + 1; 3609 BUG_ON(*indices_count >= max_indices); 3610 } 3611 3612 register_list_format[ind_offset] = indices; 3613 } 3614 } 3615 3616 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3617 { 3618 int i, temp, data; 3619 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3620 int indices_count = 0; 3621 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3622 int offset_count = 0; 3623 3624 int list_size; 3625 unsigned int *register_list_format = 3626 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3627 if (register_list_format == NULL) 3628 return -ENOMEM; 3629 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 3630 adev->gfx.rlc.reg_list_format_size_bytes); 3631 3632 gfx_v8_0_parse_ind_reg_list(register_list_format, 3633 RLC_FormatDirectRegListLength, 3634 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3635 unique_indices, 3636 &indices_count, 3637 sizeof(unique_indices) / sizeof(int), 3638 indirect_start_offsets, 3639 &offset_count, 3640 sizeof(indirect_start_offsets)/sizeof(int)); 3641 3642 /* save and restore list */ 3643 temp = RREG32(mmRLC_SRM_CNTL); 3644 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; 3645 WREG32(mmRLC_SRM_CNTL, temp); 3646 3647 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3648 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3649 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3650 3651 /* indirect list */ 3652 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3653 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3654 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3655 3656 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3657 list_size = list_size >> 1; 3658 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3659 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3660 3661 /* starting offsets starts */ 3662 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3663 adev->gfx.rlc.starting_offsets_start); 3664 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 3665 WREG32(mmRLC_GPM_SCRATCH_DATA, 3666 indirect_start_offsets[i]); 3667 3668 /* unique indices */ 3669 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3670 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3671 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 3672 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false); 3673 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false); 3674 } 3675 kfree(register_list_format); 3676 3677 return 0; 3678 } 3679 3680 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3681 { 3682 uint32_t data; 3683 3684 data = RREG32(mmRLC_SRM_CNTL); 3685 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK; 3686 WREG32(mmRLC_SRM_CNTL, data); 3687 } 3688 3689 static void polaris11_init_power_gating(struct amdgpu_device *adev) 3690 { 3691 uint32_t data; 3692 3693 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3694 AMD_PG_SUPPORT_GFX_SMG | 3695 AMD_PG_SUPPORT_GFX_DMG)) { 3696 data = RREG32(mmCP_RB_WPTR_POLL_CNTL); 3697 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; 3698 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); 3699 WREG32(mmCP_RB_WPTR_POLL_CNTL, data); 3700 3701 data = 0; 3702 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); 3703 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); 3704 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); 3705 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); 3706 WREG32(mmRLC_PG_DELAY, data); 3707 3708 data = RREG32(mmRLC_PG_DELAY_2); 3709 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; 3710 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); 3711 WREG32(mmRLC_PG_DELAY_2, data); 3712 3713 data = RREG32(mmRLC_AUTO_PG_CTRL); 3714 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; 3715 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); 3716 WREG32(mmRLC_AUTO_PG_CTRL, data); 3717 } 3718 } 3719 3720 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 3721 { 3722 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | 3723 AMD_PG_SUPPORT_GFX_SMG | 3724 AMD_PG_SUPPORT_GFX_DMG | 3725 AMD_PG_SUPPORT_CP | 3726 AMD_PG_SUPPORT_GDS | 3727 AMD_PG_SUPPORT_RLC_SMU_HS)) { 3728 gfx_v8_0_init_csb(adev); 3729 gfx_v8_0_init_save_restore_list(adev); 3730 gfx_v8_0_enable_save_restore_machine(adev); 3731 3732 if (adev->asic_type == CHIP_POLARIS11) 3733 polaris11_init_power_gating(adev); 3734 } 3735 } 3736 3737 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 3738 { 3739 u32 tmp = RREG32(mmRLC_CNTL); 3740 3741 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0); 3742 WREG32(mmRLC_CNTL, tmp); 3743 3744 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 3745 3746 gfx_v8_0_wait_for_rlc_serdes(adev); 3747 } 3748 3749 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 3750 { 3751 u32 tmp = RREG32(mmGRBM_SOFT_RESET); 3752 3753 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 3754 WREG32(mmGRBM_SOFT_RESET, tmp); 3755 udelay(50); 3756 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 3757 WREG32(mmGRBM_SOFT_RESET, tmp); 3758 udelay(50); 3759 } 3760 3761 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 3762 { 3763 u32 tmp = RREG32(mmRLC_CNTL); 3764 3765 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1); 3766 WREG32(mmRLC_CNTL, tmp); 3767 3768 /* carrizo do enable cp interrupt after cp inited */ 3769 if (!(adev->flags & AMD_IS_APU)) 3770 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 3771 3772 udelay(50); 3773 } 3774 3775 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 3776 { 3777 const struct rlc_firmware_header_v2_0 *hdr; 3778 const __le32 *fw_data; 3779 unsigned i, fw_size; 3780 3781 if (!adev->gfx.rlc_fw) 3782 return -EINVAL; 3783 3784 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 3785 amdgpu_ucode_print_rlc_hdr(&hdr->header); 3786 3787 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 3788 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 3789 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 3790 3791 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 3792 for (i = 0; i < fw_size; i++) 3793 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 3794 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 3795 3796 return 0; 3797 } 3798 3799 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 3800 { 3801 int r; 3802 3803 gfx_v8_0_rlc_stop(adev); 3804 3805 /* disable CG */ 3806 WREG32(mmRLC_CGCG_CGLS_CTRL, 0); 3807 if (adev->asic_type == CHIP_POLARIS11 || 3808 adev->asic_type == CHIP_POLARIS10) 3809 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0); 3810 3811 /* disable PG */ 3812 WREG32(mmRLC_PG_CNTL, 0); 3813 3814 gfx_v8_0_rlc_reset(adev); 3815 3816 gfx_v8_0_init_pg(adev); 3817 3818 if (!adev->pp_enabled) { 3819 if (!adev->firmware.smu_load) { 3820 /* legacy rlc firmware loading */ 3821 r = gfx_v8_0_rlc_load_microcode(adev); 3822 if (r) 3823 return r; 3824 } else { 3825 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 3826 AMDGPU_UCODE_ID_RLC_G); 3827 if (r) 3828 return -EINVAL; 3829 } 3830 } 3831 3832 gfx_v8_0_rlc_start(adev); 3833 3834 return 0; 3835 } 3836 3837 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 3838 { 3839 int i; 3840 u32 tmp = RREG32(mmCP_ME_CNTL); 3841 3842 if (enable) { 3843 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 3844 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 3845 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 3846 } else { 3847 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 3848 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 3849 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 3850 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 3851 adev->gfx.gfx_ring[i].ready = false; 3852 } 3853 WREG32(mmCP_ME_CNTL, tmp); 3854 udelay(50); 3855 } 3856 3857 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 3858 { 3859 const struct gfx_firmware_header_v1_0 *pfp_hdr; 3860 const struct gfx_firmware_header_v1_0 *ce_hdr; 3861 const struct gfx_firmware_header_v1_0 *me_hdr; 3862 const __le32 *fw_data; 3863 unsigned i, fw_size; 3864 3865 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 3866 return -EINVAL; 3867 3868 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 3869 adev->gfx.pfp_fw->data; 3870 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 3871 adev->gfx.ce_fw->data; 3872 me_hdr = (const struct gfx_firmware_header_v1_0 *) 3873 adev->gfx.me_fw->data; 3874 3875 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 3876 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 3877 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 3878 3879 gfx_v8_0_cp_gfx_enable(adev, false); 3880 3881 /* PFP */ 3882 fw_data = (const __le32 *) 3883 (adev->gfx.pfp_fw->data + 3884 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 3885 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 3886 WREG32(mmCP_PFP_UCODE_ADDR, 0); 3887 for (i = 0; i < fw_size; i++) 3888 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 3889 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 3890 3891 /* CE */ 3892 fw_data = (const __le32 *) 3893 (adev->gfx.ce_fw->data + 3894 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 3895 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 3896 WREG32(mmCP_CE_UCODE_ADDR, 0); 3897 for (i = 0; i < fw_size; i++) 3898 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 3899 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 3900 3901 /* ME */ 3902 fw_data = (const __le32 *) 3903 (adev->gfx.me_fw->data + 3904 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 3905 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 3906 WREG32(mmCP_ME_RAM_WADDR, 0); 3907 for (i = 0; i < fw_size; i++) 3908 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 3909 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 3910 3911 return 0; 3912 } 3913 3914 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 3915 { 3916 u32 count = 0; 3917 const struct cs_section_def *sect = NULL; 3918 const struct cs_extent_def *ext = NULL; 3919 3920 /* begin clear state */ 3921 count += 2; 3922 /* context control state */ 3923 count += 3; 3924 3925 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3926 for (ext = sect->section; ext->extent != NULL; ++ext) { 3927 if (sect->id == SECT_CONTEXT) 3928 count += 2 + ext->reg_count; 3929 else 3930 return 0; 3931 } 3932 } 3933 /* pa_sc_raster_config/pa_sc_raster_config1 */ 3934 count += 4; 3935 /* end clear state */ 3936 count += 2; 3937 /* clear state */ 3938 count += 2; 3939 3940 return count; 3941 } 3942 3943 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 3944 { 3945 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 3946 const struct cs_section_def *sect = NULL; 3947 const struct cs_extent_def *ext = NULL; 3948 int r, i; 3949 3950 /* init the CP */ 3951 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 3952 WREG32(mmCP_ENDIAN_SWAP, 0); 3953 WREG32(mmCP_DEVICE_ID, 1); 3954 3955 gfx_v8_0_cp_gfx_enable(adev, true); 3956 3957 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 3958 if (r) { 3959 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 3960 return r; 3961 } 3962 3963 /* clear state buffer */ 3964 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 3965 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 3966 3967 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 3968 amdgpu_ring_write(ring, 0x80000000); 3969 amdgpu_ring_write(ring, 0x80000000); 3970 3971 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 3972 for (ext = sect->section; ext->extent != NULL; ++ext) { 3973 if (sect->id == SECT_CONTEXT) { 3974 amdgpu_ring_write(ring, 3975 PACKET3(PACKET3_SET_CONTEXT_REG, 3976 ext->reg_count)); 3977 amdgpu_ring_write(ring, 3978 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 3979 for (i = 0; i < ext->reg_count; i++) 3980 amdgpu_ring_write(ring, ext->extent[i]); 3981 } 3982 } 3983 } 3984 3985 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 3986 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 3987 switch (adev->asic_type) { 3988 case CHIP_TONGA: 3989 case CHIP_POLARIS10: 3990 amdgpu_ring_write(ring, 0x16000012); 3991 amdgpu_ring_write(ring, 0x0000002A); 3992 break; 3993 case CHIP_POLARIS11: 3994 amdgpu_ring_write(ring, 0x16000012); 3995 amdgpu_ring_write(ring, 0x00000000); 3996 break; 3997 case CHIP_FIJI: 3998 amdgpu_ring_write(ring, 0x3a00161a); 3999 amdgpu_ring_write(ring, 0x0000002e); 4000 break; 4001 case CHIP_CARRIZO: 4002 amdgpu_ring_write(ring, 0x00000002); 4003 amdgpu_ring_write(ring, 0x00000000); 4004 break; 4005 case CHIP_TOPAZ: 4006 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4007 0x00000000 : 0x00000002); 4008 amdgpu_ring_write(ring, 0x00000000); 4009 break; 4010 case CHIP_STONEY: 4011 amdgpu_ring_write(ring, 0x00000000); 4012 amdgpu_ring_write(ring, 0x00000000); 4013 break; 4014 default: 4015 BUG(); 4016 } 4017 4018 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4019 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4020 4021 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4022 amdgpu_ring_write(ring, 0); 4023 4024 /* init the CE partitions */ 4025 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4026 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4027 amdgpu_ring_write(ring, 0x8000); 4028 amdgpu_ring_write(ring, 0x8000); 4029 4030 amdgpu_ring_commit(ring); 4031 4032 return 0; 4033 } 4034 4035 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4036 { 4037 struct amdgpu_ring *ring; 4038 u32 tmp; 4039 u32 rb_bufsz; 4040 u64 rb_addr, rptr_addr; 4041 int r; 4042 4043 /* Set the write pointer delay */ 4044 WREG32(mmCP_RB_WPTR_DELAY, 0); 4045 4046 /* set the RB to use vmid 0 */ 4047 WREG32(mmCP_RB_VMID, 0); 4048 4049 /* Set ring buffer size */ 4050 ring = &adev->gfx.gfx_ring[0]; 4051 rb_bufsz = order_base_2(ring->ring_size / 8); 4052 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4053 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4054 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4055 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4056 #ifdef __BIG_ENDIAN 4057 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4058 #endif 4059 WREG32(mmCP_RB0_CNTL, tmp); 4060 4061 /* Initialize the ring buffer's read and write pointers */ 4062 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4063 ring->wptr = 0; 4064 WREG32(mmCP_RB0_WPTR, ring->wptr); 4065 4066 /* set the wb address wether it's enabled or not */ 4067 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4068 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4069 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4070 4071 mdelay(1); 4072 WREG32(mmCP_RB0_CNTL, tmp); 4073 4074 rb_addr = ring->gpu_addr >> 8; 4075 WREG32(mmCP_RB0_BASE, rb_addr); 4076 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4077 4078 /* no gfx doorbells on iceland */ 4079 if (adev->asic_type != CHIP_TOPAZ) { 4080 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4081 if (ring->use_doorbell) { 4082 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4083 DOORBELL_OFFSET, ring->doorbell_index); 4084 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4085 DOORBELL_HIT, 0); 4086 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4087 DOORBELL_EN, 1); 4088 } else { 4089 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4090 DOORBELL_EN, 0); 4091 } 4092 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4093 4094 if (adev->asic_type == CHIP_TONGA) { 4095 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4096 DOORBELL_RANGE_LOWER, 4097 AMDGPU_DOORBELL_GFX_RING0); 4098 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4099 4100 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4101 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4102 } 4103 4104 } 4105 4106 /* start the ring */ 4107 gfx_v8_0_cp_gfx_start(adev); 4108 ring->ready = true; 4109 r = amdgpu_ring_test_ring(ring); 4110 if (r) { 4111 ring->ready = false; 4112 return r; 4113 } 4114 4115 return 0; 4116 } 4117 4118 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4119 { 4120 int i; 4121 4122 if (enable) { 4123 WREG32(mmCP_MEC_CNTL, 0); 4124 } else { 4125 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4126 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4127 adev->gfx.compute_ring[i].ready = false; 4128 } 4129 udelay(50); 4130 } 4131 4132 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4133 { 4134 const struct gfx_firmware_header_v1_0 *mec_hdr; 4135 const __le32 *fw_data; 4136 unsigned i, fw_size; 4137 4138 if (!adev->gfx.mec_fw) 4139 return -EINVAL; 4140 4141 gfx_v8_0_cp_compute_enable(adev, false); 4142 4143 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4144 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4145 4146 fw_data = (const __le32 *) 4147 (adev->gfx.mec_fw->data + 4148 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4149 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4150 4151 /* MEC1 */ 4152 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4153 for (i = 0; i < fw_size; i++) 4154 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4155 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4156 4157 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4158 if (adev->gfx.mec2_fw) { 4159 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4160 4161 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4162 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4163 4164 fw_data = (const __le32 *) 4165 (adev->gfx.mec2_fw->data + 4166 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4167 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4168 4169 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4170 for (i = 0; i < fw_size; i++) 4171 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4172 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4173 } 4174 4175 return 0; 4176 } 4177 4178 struct vi_mqd { 4179 uint32_t header; /* ordinal0 */ 4180 uint32_t compute_dispatch_initiator; /* ordinal1 */ 4181 uint32_t compute_dim_x; /* ordinal2 */ 4182 uint32_t compute_dim_y; /* ordinal3 */ 4183 uint32_t compute_dim_z; /* ordinal4 */ 4184 uint32_t compute_start_x; /* ordinal5 */ 4185 uint32_t compute_start_y; /* ordinal6 */ 4186 uint32_t compute_start_z; /* ordinal7 */ 4187 uint32_t compute_num_thread_x; /* ordinal8 */ 4188 uint32_t compute_num_thread_y; /* ordinal9 */ 4189 uint32_t compute_num_thread_z; /* ordinal10 */ 4190 uint32_t compute_pipelinestat_enable; /* ordinal11 */ 4191 uint32_t compute_perfcount_enable; /* ordinal12 */ 4192 uint32_t compute_pgm_lo; /* ordinal13 */ 4193 uint32_t compute_pgm_hi; /* ordinal14 */ 4194 uint32_t compute_tba_lo; /* ordinal15 */ 4195 uint32_t compute_tba_hi; /* ordinal16 */ 4196 uint32_t compute_tma_lo; /* ordinal17 */ 4197 uint32_t compute_tma_hi; /* ordinal18 */ 4198 uint32_t compute_pgm_rsrc1; /* ordinal19 */ 4199 uint32_t compute_pgm_rsrc2; /* ordinal20 */ 4200 uint32_t compute_vmid; /* ordinal21 */ 4201 uint32_t compute_resource_limits; /* ordinal22 */ 4202 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */ 4203 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */ 4204 uint32_t compute_tmpring_size; /* ordinal25 */ 4205 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */ 4206 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */ 4207 uint32_t compute_restart_x; /* ordinal28 */ 4208 uint32_t compute_restart_y; /* ordinal29 */ 4209 uint32_t compute_restart_z; /* ordinal30 */ 4210 uint32_t compute_thread_trace_enable; /* ordinal31 */ 4211 uint32_t compute_misc_reserved; /* ordinal32 */ 4212 uint32_t compute_dispatch_id; /* ordinal33 */ 4213 uint32_t compute_threadgroup_id; /* ordinal34 */ 4214 uint32_t compute_relaunch; /* ordinal35 */ 4215 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */ 4216 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */ 4217 uint32_t compute_wave_restore_control; /* ordinal38 */ 4218 uint32_t reserved9; /* ordinal39 */ 4219 uint32_t reserved10; /* ordinal40 */ 4220 uint32_t reserved11; /* ordinal41 */ 4221 uint32_t reserved12; /* ordinal42 */ 4222 uint32_t reserved13; /* ordinal43 */ 4223 uint32_t reserved14; /* ordinal44 */ 4224 uint32_t reserved15; /* ordinal45 */ 4225 uint32_t reserved16; /* ordinal46 */ 4226 uint32_t reserved17; /* ordinal47 */ 4227 uint32_t reserved18; /* ordinal48 */ 4228 uint32_t reserved19; /* ordinal49 */ 4229 uint32_t reserved20; /* ordinal50 */ 4230 uint32_t reserved21; /* ordinal51 */ 4231 uint32_t reserved22; /* ordinal52 */ 4232 uint32_t reserved23; /* ordinal53 */ 4233 uint32_t reserved24; /* ordinal54 */ 4234 uint32_t reserved25; /* ordinal55 */ 4235 uint32_t reserved26; /* ordinal56 */ 4236 uint32_t reserved27; /* ordinal57 */ 4237 uint32_t reserved28; /* ordinal58 */ 4238 uint32_t reserved29; /* ordinal59 */ 4239 uint32_t reserved30; /* ordinal60 */ 4240 uint32_t reserved31; /* ordinal61 */ 4241 uint32_t reserved32; /* ordinal62 */ 4242 uint32_t reserved33; /* ordinal63 */ 4243 uint32_t reserved34; /* ordinal64 */ 4244 uint32_t compute_user_data_0; /* ordinal65 */ 4245 uint32_t compute_user_data_1; /* ordinal66 */ 4246 uint32_t compute_user_data_2; /* ordinal67 */ 4247 uint32_t compute_user_data_3; /* ordinal68 */ 4248 uint32_t compute_user_data_4; /* ordinal69 */ 4249 uint32_t compute_user_data_5; /* ordinal70 */ 4250 uint32_t compute_user_data_6; /* ordinal71 */ 4251 uint32_t compute_user_data_7; /* ordinal72 */ 4252 uint32_t compute_user_data_8; /* ordinal73 */ 4253 uint32_t compute_user_data_9; /* ordinal74 */ 4254 uint32_t compute_user_data_10; /* ordinal75 */ 4255 uint32_t compute_user_data_11; /* ordinal76 */ 4256 uint32_t compute_user_data_12; /* ordinal77 */ 4257 uint32_t compute_user_data_13; /* ordinal78 */ 4258 uint32_t compute_user_data_14; /* ordinal79 */ 4259 uint32_t compute_user_data_15; /* ordinal80 */ 4260 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */ 4261 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */ 4262 uint32_t reserved35; /* ordinal83 */ 4263 uint32_t reserved36; /* ordinal84 */ 4264 uint32_t reserved37; /* ordinal85 */ 4265 uint32_t cp_mqd_query_time_lo; /* ordinal86 */ 4266 uint32_t cp_mqd_query_time_hi; /* ordinal87 */ 4267 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */ 4268 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */ 4269 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */ 4270 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */ 4271 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */ 4272 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */ 4273 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */ 4274 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */ 4275 uint32_t reserved38; /* ordinal96 */ 4276 uint32_t reserved39; /* ordinal97 */ 4277 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */ 4278 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */ 4279 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */ 4280 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */ 4281 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */ 4282 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */ 4283 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */ 4284 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */ 4285 uint32_t reserved40; /* ordinal106 */ 4286 uint32_t reserved41; /* ordinal107 */ 4287 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */ 4288 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */ 4289 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */ 4290 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */ 4291 uint32_t reserved42; /* ordinal112 */ 4292 uint32_t reserved43; /* ordinal113 */ 4293 uint32_t cp_pq_exe_status_lo; /* ordinal114 */ 4294 uint32_t cp_pq_exe_status_hi; /* ordinal115 */ 4295 uint32_t cp_packet_id_lo; /* ordinal116 */ 4296 uint32_t cp_packet_id_hi; /* ordinal117 */ 4297 uint32_t cp_packet_exe_status_lo; /* ordinal118 */ 4298 uint32_t cp_packet_exe_status_hi; /* ordinal119 */ 4299 uint32_t gds_save_base_addr_lo; /* ordinal120 */ 4300 uint32_t gds_save_base_addr_hi; /* ordinal121 */ 4301 uint32_t gds_save_mask_lo; /* ordinal122 */ 4302 uint32_t gds_save_mask_hi; /* ordinal123 */ 4303 uint32_t ctx_save_base_addr_lo; /* ordinal124 */ 4304 uint32_t ctx_save_base_addr_hi; /* ordinal125 */ 4305 uint32_t reserved44; /* ordinal126 */ 4306 uint32_t reserved45; /* ordinal127 */ 4307 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */ 4308 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */ 4309 uint32_t cp_hqd_active; /* ordinal130 */ 4310 uint32_t cp_hqd_vmid; /* ordinal131 */ 4311 uint32_t cp_hqd_persistent_state; /* ordinal132 */ 4312 uint32_t cp_hqd_pipe_priority; /* ordinal133 */ 4313 uint32_t cp_hqd_queue_priority; /* ordinal134 */ 4314 uint32_t cp_hqd_quantum; /* ordinal135 */ 4315 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */ 4316 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */ 4317 uint32_t cp_hqd_pq_rptr; /* ordinal138 */ 4318 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */ 4319 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */ 4320 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */ 4321 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */ 4322 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */ 4323 uint32_t cp_hqd_pq_wptr; /* ordinal144 */ 4324 uint32_t cp_hqd_pq_control; /* ordinal145 */ 4325 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */ 4326 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */ 4327 uint32_t cp_hqd_ib_rptr; /* ordinal148 */ 4328 uint32_t cp_hqd_ib_control; /* ordinal149 */ 4329 uint32_t cp_hqd_iq_timer; /* ordinal150 */ 4330 uint32_t cp_hqd_iq_rptr; /* ordinal151 */ 4331 uint32_t cp_hqd_dequeue_request; /* ordinal152 */ 4332 uint32_t cp_hqd_dma_offload; /* ordinal153 */ 4333 uint32_t cp_hqd_sema_cmd; /* ordinal154 */ 4334 uint32_t cp_hqd_msg_type; /* ordinal155 */ 4335 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */ 4336 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */ 4337 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */ 4338 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */ 4339 uint32_t cp_hqd_hq_status0; /* ordinal160 */ 4340 uint32_t cp_hqd_hq_control0; /* ordinal161 */ 4341 uint32_t cp_mqd_control; /* ordinal162 */ 4342 uint32_t cp_hqd_hq_status1; /* ordinal163 */ 4343 uint32_t cp_hqd_hq_control1; /* ordinal164 */ 4344 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */ 4345 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */ 4346 uint32_t cp_hqd_eop_control; /* ordinal167 */ 4347 uint32_t cp_hqd_eop_rptr; /* ordinal168 */ 4348 uint32_t cp_hqd_eop_wptr; /* ordinal169 */ 4349 uint32_t cp_hqd_eop_done_events; /* ordinal170 */ 4350 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */ 4351 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */ 4352 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */ 4353 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */ 4354 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */ 4355 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */ 4356 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */ 4357 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */ 4358 uint32_t cp_hqd_error; /* ordinal179 */ 4359 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */ 4360 uint32_t cp_hqd_eop_dones; /* ordinal181 */ 4361 uint32_t reserved46; /* ordinal182 */ 4362 uint32_t reserved47; /* ordinal183 */ 4363 uint32_t reserved48; /* ordinal184 */ 4364 uint32_t reserved49; /* ordinal185 */ 4365 uint32_t reserved50; /* ordinal186 */ 4366 uint32_t reserved51; /* ordinal187 */ 4367 uint32_t reserved52; /* ordinal188 */ 4368 uint32_t reserved53; /* ordinal189 */ 4369 uint32_t reserved54; /* ordinal190 */ 4370 uint32_t reserved55; /* ordinal191 */ 4371 uint32_t iqtimer_pkt_header; /* ordinal192 */ 4372 uint32_t iqtimer_pkt_dw0; /* ordinal193 */ 4373 uint32_t iqtimer_pkt_dw1; /* ordinal194 */ 4374 uint32_t iqtimer_pkt_dw2; /* ordinal195 */ 4375 uint32_t iqtimer_pkt_dw3; /* ordinal196 */ 4376 uint32_t iqtimer_pkt_dw4; /* ordinal197 */ 4377 uint32_t iqtimer_pkt_dw5; /* ordinal198 */ 4378 uint32_t iqtimer_pkt_dw6; /* ordinal199 */ 4379 uint32_t iqtimer_pkt_dw7; /* ordinal200 */ 4380 uint32_t iqtimer_pkt_dw8; /* ordinal201 */ 4381 uint32_t iqtimer_pkt_dw9; /* ordinal202 */ 4382 uint32_t iqtimer_pkt_dw10; /* ordinal203 */ 4383 uint32_t iqtimer_pkt_dw11; /* ordinal204 */ 4384 uint32_t iqtimer_pkt_dw12; /* ordinal205 */ 4385 uint32_t iqtimer_pkt_dw13; /* ordinal206 */ 4386 uint32_t iqtimer_pkt_dw14; /* ordinal207 */ 4387 uint32_t iqtimer_pkt_dw15; /* ordinal208 */ 4388 uint32_t iqtimer_pkt_dw16; /* ordinal209 */ 4389 uint32_t iqtimer_pkt_dw17; /* ordinal210 */ 4390 uint32_t iqtimer_pkt_dw18; /* ordinal211 */ 4391 uint32_t iqtimer_pkt_dw19; /* ordinal212 */ 4392 uint32_t iqtimer_pkt_dw20; /* ordinal213 */ 4393 uint32_t iqtimer_pkt_dw21; /* ordinal214 */ 4394 uint32_t iqtimer_pkt_dw22; /* ordinal215 */ 4395 uint32_t iqtimer_pkt_dw23; /* ordinal216 */ 4396 uint32_t iqtimer_pkt_dw24; /* ordinal217 */ 4397 uint32_t iqtimer_pkt_dw25; /* ordinal218 */ 4398 uint32_t iqtimer_pkt_dw26; /* ordinal219 */ 4399 uint32_t iqtimer_pkt_dw27; /* ordinal220 */ 4400 uint32_t iqtimer_pkt_dw28; /* ordinal221 */ 4401 uint32_t iqtimer_pkt_dw29; /* ordinal222 */ 4402 uint32_t iqtimer_pkt_dw30; /* ordinal223 */ 4403 uint32_t iqtimer_pkt_dw31; /* ordinal224 */ 4404 uint32_t reserved56; /* ordinal225 */ 4405 uint32_t reserved57; /* ordinal226 */ 4406 uint32_t reserved58; /* ordinal227 */ 4407 uint32_t set_resources_header; /* ordinal228 */ 4408 uint32_t set_resources_dw1; /* ordinal229 */ 4409 uint32_t set_resources_dw2; /* ordinal230 */ 4410 uint32_t set_resources_dw3; /* ordinal231 */ 4411 uint32_t set_resources_dw4; /* ordinal232 */ 4412 uint32_t set_resources_dw5; /* ordinal233 */ 4413 uint32_t set_resources_dw6; /* ordinal234 */ 4414 uint32_t set_resources_dw7; /* ordinal235 */ 4415 uint32_t reserved59; /* ordinal236 */ 4416 uint32_t reserved60; /* ordinal237 */ 4417 uint32_t reserved61; /* ordinal238 */ 4418 uint32_t reserved62; /* ordinal239 */ 4419 uint32_t reserved63; /* ordinal240 */ 4420 uint32_t reserved64; /* ordinal241 */ 4421 uint32_t reserved65; /* ordinal242 */ 4422 uint32_t reserved66; /* ordinal243 */ 4423 uint32_t reserved67; /* ordinal244 */ 4424 uint32_t reserved68; /* ordinal245 */ 4425 uint32_t reserved69; /* ordinal246 */ 4426 uint32_t reserved70; /* ordinal247 */ 4427 uint32_t reserved71; /* ordinal248 */ 4428 uint32_t reserved72; /* ordinal249 */ 4429 uint32_t reserved73; /* ordinal250 */ 4430 uint32_t reserved74; /* ordinal251 */ 4431 uint32_t reserved75; /* ordinal252 */ 4432 uint32_t reserved76; /* ordinal253 */ 4433 uint32_t reserved77; /* ordinal254 */ 4434 uint32_t reserved78; /* ordinal255 */ 4435 4436 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */ 4437 }; 4438 4439 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4440 { 4441 int i, r; 4442 4443 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4444 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4445 4446 if (ring->mqd_obj) { 4447 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4448 if (unlikely(r != 0)) 4449 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4450 4451 amdgpu_bo_unpin(ring->mqd_obj); 4452 amdgpu_bo_unreserve(ring->mqd_obj); 4453 4454 amdgpu_bo_unref(&ring->mqd_obj); 4455 ring->mqd_obj = NULL; 4456 } 4457 } 4458 } 4459 4460 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4461 { 4462 int r, i, j; 4463 u32 tmp; 4464 bool use_doorbell = true; 4465 u64 hqd_gpu_addr; 4466 u64 mqd_gpu_addr; 4467 u64 eop_gpu_addr; 4468 u64 wb_gpu_addr; 4469 u32 *buf; 4470 struct vi_mqd *mqd; 4471 4472 /* init the pipes */ 4473 mutex_lock(&adev->srbm_mutex); 4474 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { 4475 int me = (i < 4) ? 1 : 2; 4476 int pipe = (i < 4) ? i : (i - 4); 4477 4478 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 4479 eop_gpu_addr >>= 8; 4480 4481 vi_srbm_select(adev, me, pipe, 0, 0); 4482 4483 /* write the EOP addr */ 4484 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 4485 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 4486 4487 /* set the VMID assigned */ 4488 WREG32(mmCP_HQD_VMID, 0); 4489 4490 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4491 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4492 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4493 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4494 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 4495 } 4496 vi_srbm_select(adev, 0, 0, 0, 0); 4497 mutex_unlock(&adev->srbm_mutex); 4498 4499 /* init the queues. Just two for now. */ 4500 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4501 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4502 4503 if (ring->mqd_obj == NULL) { 4504 r = amdgpu_bo_create(adev, 4505 sizeof(struct vi_mqd), 4506 PAGE_SIZE, true, 4507 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 4508 NULL, &ring->mqd_obj); 4509 if (r) { 4510 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 4511 return r; 4512 } 4513 } 4514 4515 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4516 if (unlikely(r != 0)) { 4517 gfx_v8_0_cp_compute_fini(adev); 4518 return r; 4519 } 4520 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 4521 &mqd_gpu_addr); 4522 if (r) { 4523 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 4524 gfx_v8_0_cp_compute_fini(adev); 4525 return r; 4526 } 4527 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 4528 if (r) { 4529 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 4530 gfx_v8_0_cp_compute_fini(adev); 4531 return r; 4532 } 4533 4534 /* init the mqd struct */ 4535 memset(buf, 0, sizeof(struct vi_mqd)); 4536 4537 mqd = (struct vi_mqd *)buf; 4538 mqd->header = 0xC0310800; 4539 mqd->compute_pipelinestat_enable = 0x00000001; 4540 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4541 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4542 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4543 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4544 mqd->compute_misc_reserved = 0x00000003; 4545 4546 mutex_lock(&adev->srbm_mutex); 4547 vi_srbm_select(adev, ring->me, 4548 ring->pipe, 4549 ring->queue, 0); 4550 4551 /* disable wptr polling */ 4552 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 4553 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 4554 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 4555 4556 mqd->cp_hqd_eop_base_addr_lo = 4557 RREG32(mmCP_HQD_EOP_BASE_ADDR); 4558 mqd->cp_hqd_eop_base_addr_hi = 4559 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 4560 4561 /* enable doorbell? */ 4562 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4563 if (use_doorbell) { 4564 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4565 } else { 4566 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 4567 } 4568 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 4569 mqd->cp_hqd_pq_doorbell_control = tmp; 4570 4571 /* disable the queue if it's active */ 4572 mqd->cp_hqd_dequeue_request = 0; 4573 mqd->cp_hqd_pq_rptr = 0; 4574 mqd->cp_hqd_pq_wptr= 0; 4575 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 4576 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4577 for (j = 0; j < adev->usec_timeout; j++) { 4578 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 4579 break; 4580 udelay(1); 4581 } 4582 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4583 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4584 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4585 } 4586 4587 /* set the pointer to the MQD */ 4588 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 4589 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 4590 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4591 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4592 4593 /* set MQD vmid to 0 */ 4594 tmp = RREG32(mmCP_MQD_CONTROL); 4595 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4596 WREG32(mmCP_MQD_CONTROL, tmp); 4597 mqd->cp_mqd_control = tmp; 4598 4599 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4600 hqd_gpu_addr = ring->gpu_addr >> 8; 4601 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4602 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4603 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4604 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4605 4606 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4607 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4608 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4609 (order_base_2(ring->ring_size / 4) - 1)); 4610 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4611 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4612 #ifdef __BIG_ENDIAN 4613 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4614 #endif 4615 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4616 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4617 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4618 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4619 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 4620 mqd->cp_hqd_pq_control = tmp; 4621 4622 /* set the wb address wether it's enabled or not */ 4623 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4624 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4625 mqd->cp_hqd_pq_rptr_report_addr_hi = 4626 upper_32_bits(wb_gpu_addr) & 0xffff; 4627 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4628 mqd->cp_hqd_pq_rptr_report_addr_lo); 4629 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4630 mqd->cp_hqd_pq_rptr_report_addr_hi); 4631 4632 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4633 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4634 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; 4635 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4636 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr); 4637 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 4638 mqd->cp_hqd_pq_wptr_poll_addr_hi); 4639 4640 /* enable the doorbell if requested */ 4641 if (use_doorbell) { 4642 if ((adev->asic_type == CHIP_CARRIZO) || 4643 (adev->asic_type == CHIP_FIJI) || 4644 (adev->asic_type == CHIP_STONEY) || 4645 (adev->asic_type == CHIP_POLARIS11) || 4646 (adev->asic_type == CHIP_POLARIS10)) { 4647 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4648 AMDGPU_DOORBELL_KIQ << 2); 4649 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4650 AMDGPU_DOORBELL_MEC_RING7 << 2); 4651 } 4652 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4653 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4654 DOORBELL_OFFSET, ring->doorbell_index); 4655 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 4656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 4657 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 4658 mqd->cp_hqd_pq_doorbell_control = tmp; 4659 4660 } else { 4661 mqd->cp_hqd_pq_doorbell_control = 0; 4662 } 4663 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 4664 mqd->cp_hqd_pq_doorbell_control); 4665 4666 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4667 ring->wptr = 0; 4668 mqd->cp_hqd_pq_wptr = ring->wptr; 4669 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4670 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4671 4672 /* set the vmid for the queue */ 4673 mqd->cp_hqd_vmid = 0; 4674 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4675 4676 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4677 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4678 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 4679 mqd->cp_hqd_persistent_state = tmp; 4680 if (adev->asic_type == CHIP_STONEY || 4681 adev->asic_type == CHIP_POLARIS11 || 4682 adev->asic_type == CHIP_POLARIS10) { 4683 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 4684 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 4685 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 4686 } 4687 4688 /* activate the queue */ 4689 mqd->cp_hqd_active = 1; 4690 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4691 4692 vi_srbm_select(adev, 0, 0, 0, 0); 4693 mutex_unlock(&adev->srbm_mutex); 4694 4695 amdgpu_bo_kunmap(ring->mqd_obj); 4696 amdgpu_bo_unreserve(ring->mqd_obj); 4697 } 4698 4699 if (use_doorbell) { 4700 tmp = RREG32(mmCP_PQ_STATUS); 4701 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4702 WREG32(mmCP_PQ_STATUS, tmp); 4703 } 4704 4705 gfx_v8_0_cp_compute_enable(adev, true); 4706 4707 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4708 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4709 4710 ring->ready = true; 4711 r = amdgpu_ring_test_ring(ring); 4712 if (r) 4713 ring->ready = false; 4714 } 4715 4716 return 0; 4717 } 4718 4719 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4720 { 4721 int r; 4722 4723 if (!(adev->flags & AMD_IS_APU)) 4724 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4725 4726 if (!adev->pp_enabled) { 4727 if (!adev->firmware.smu_load) { 4728 /* legacy firmware loading */ 4729 r = gfx_v8_0_cp_gfx_load_microcode(adev); 4730 if (r) 4731 return r; 4732 4733 r = gfx_v8_0_cp_compute_load_microcode(adev); 4734 if (r) 4735 return r; 4736 } else { 4737 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4738 AMDGPU_UCODE_ID_CP_CE); 4739 if (r) 4740 return -EINVAL; 4741 4742 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4743 AMDGPU_UCODE_ID_CP_PFP); 4744 if (r) 4745 return -EINVAL; 4746 4747 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4748 AMDGPU_UCODE_ID_CP_ME); 4749 if (r) 4750 return -EINVAL; 4751 4752 if (adev->asic_type == CHIP_TOPAZ) { 4753 r = gfx_v8_0_cp_compute_load_microcode(adev); 4754 if (r) 4755 return r; 4756 } else { 4757 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4758 AMDGPU_UCODE_ID_CP_MEC1); 4759 if (r) 4760 return -EINVAL; 4761 } 4762 } 4763 } 4764 4765 r = gfx_v8_0_cp_gfx_resume(adev); 4766 if (r) 4767 return r; 4768 4769 r = gfx_v8_0_cp_compute_resume(adev); 4770 if (r) 4771 return r; 4772 4773 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4774 4775 return 0; 4776 } 4777 4778 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4779 { 4780 gfx_v8_0_cp_gfx_enable(adev, enable); 4781 gfx_v8_0_cp_compute_enable(adev, enable); 4782 } 4783 4784 static int gfx_v8_0_hw_init(void *handle) 4785 { 4786 int r; 4787 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4788 4789 gfx_v8_0_init_golden_registers(adev); 4790 4791 gfx_v8_0_gpu_init(adev); 4792 4793 r = gfx_v8_0_rlc_resume(adev); 4794 if (r) 4795 return r; 4796 4797 r = gfx_v8_0_cp_resume(adev); 4798 if (r) 4799 return r; 4800 4801 return r; 4802 } 4803 4804 static int gfx_v8_0_hw_fini(void *handle) 4805 { 4806 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4807 4808 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4809 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4810 gfx_v8_0_cp_enable(adev, false); 4811 gfx_v8_0_rlc_stop(adev); 4812 gfx_v8_0_cp_compute_fini(adev); 4813 4814 amdgpu_set_powergating_state(adev, 4815 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 4816 4817 return 0; 4818 } 4819 4820 static int gfx_v8_0_suspend(void *handle) 4821 { 4822 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4823 4824 return gfx_v8_0_hw_fini(adev); 4825 } 4826 4827 static int gfx_v8_0_resume(void *handle) 4828 { 4829 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4830 4831 return gfx_v8_0_hw_init(adev); 4832 } 4833 4834 static bool gfx_v8_0_is_idle(void *handle) 4835 { 4836 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4837 4838 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 4839 return false; 4840 else 4841 return true; 4842 } 4843 4844 static int gfx_v8_0_wait_for_idle(void *handle) 4845 { 4846 unsigned i; 4847 u32 tmp; 4848 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4849 4850 for (i = 0; i < adev->usec_timeout; i++) { 4851 /* read MC_STATUS */ 4852 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK; 4853 4854 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE)) 4855 return 0; 4856 udelay(1); 4857 } 4858 return -ETIMEDOUT; 4859 } 4860 4861 static int gfx_v8_0_soft_reset(void *handle) 4862 { 4863 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4864 u32 tmp; 4865 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4866 4867 /* GRBM_STATUS */ 4868 tmp = RREG32(mmGRBM_STATUS); 4869 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4870 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4871 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4872 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4873 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4874 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) { 4875 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4876 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4877 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4878 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4879 } 4880 4881 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4882 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4883 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4884 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4885 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4886 } 4887 4888 /* GRBM_STATUS2 */ 4889 tmp = RREG32(mmGRBM_STATUS2); 4890 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4891 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4892 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4893 4894 /* SRBM_STATUS */ 4895 tmp = RREG32(mmSRBM_STATUS); 4896 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 4897 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4898 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4899 4900 if (grbm_soft_reset || srbm_soft_reset) { 4901 /* stop the rlc */ 4902 gfx_v8_0_rlc_stop(adev); 4903 4904 /* Disable GFX parsing/prefetching */ 4905 gfx_v8_0_cp_gfx_enable(adev, false); 4906 4907 /* Disable MEC parsing/prefetching */ 4908 gfx_v8_0_cp_compute_enable(adev, false); 4909 4910 if (grbm_soft_reset || srbm_soft_reset) { 4911 tmp = RREG32(mmGMCON_DEBUG); 4912 tmp = REG_SET_FIELD(tmp, 4913 GMCON_DEBUG, GFX_STALL, 1); 4914 tmp = REG_SET_FIELD(tmp, 4915 GMCON_DEBUG, GFX_CLEAR, 1); 4916 WREG32(mmGMCON_DEBUG, tmp); 4917 4918 udelay(50); 4919 } 4920 4921 if (grbm_soft_reset) { 4922 tmp = RREG32(mmGRBM_SOFT_RESET); 4923 tmp |= grbm_soft_reset; 4924 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 4925 WREG32(mmGRBM_SOFT_RESET, tmp); 4926 tmp = RREG32(mmGRBM_SOFT_RESET); 4927 4928 udelay(50); 4929 4930 tmp &= ~grbm_soft_reset; 4931 WREG32(mmGRBM_SOFT_RESET, tmp); 4932 tmp = RREG32(mmGRBM_SOFT_RESET); 4933 } 4934 4935 if (srbm_soft_reset) { 4936 tmp = RREG32(mmSRBM_SOFT_RESET); 4937 tmp |= srbm_soft_reset; 4938 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 4939 WREG32(mmSRBM_SOFT_RESET, tmp); 4940 tmp = RREG32(mmSRBM_SOFT_RESET); 4941 4942 udelay(50); 4943 4944 tmp &= ~srbm_soft_reset; 4945 WREG32(mmSRBM_SOFT_RESET, tmp); 4946 tmp = RREG32(mmSRBM_SOFT_RESET); 4947 } 4948 4949 if (grbm_soft_reset || srbm_soft_reset) { 4950 tmp = RREG32(mmGMCON_DEBUG); 4951 tmp = REG_SET_FIELD(tmp, 4952 GMCON_DEBUG, GFX_STALL, 0); 4953 tmp = REG_SET_FIELD(tmp, 4954 GMCON_DEBUG, GFX_CLEAR, 0); 4955 WREG32(mmGMCON_DEBUG, tmp); 4956 } 4957 4958 /* Wait a little for things to settle down */ 4959 udelay(50); 4960 } 4961 return 0; 4962 } 4963 4964 /** 4965 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 4966 * 4967 * @adev: amdgpu_device pointer 4968 * 4969 * Fetches a GPU clock counter snapshot. 4970 * Returns the 64 bit clock counter snapshot. 4971 */ 4972 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 4973 { 4974 uint64_t clock; 4975 4976 mutex_lock(&adev->gfx.gpu_clock_mutex); 4977 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 4978 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 4979 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 4980 mutex_unlock(&adev->gfx.gpu_clock_mutex); 4981 return clock; 4982 } 4983 4984 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 4985 uint32_t vmid, 4986 uint32_t gds_base, uint32_t gds_size, 4987 uint32_t gws_base, uint32_t gws_size, 4988 uint32_t oa_base, uint32_t oa_size) 4989 { 4990 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 4991 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 4992 4993 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 4994 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 4995 4996 oa_base = oa_base >> AMDGPU_OA_SHIFT; 4997 oa_size = oa_size >> AMDGPU_OA_SHIFT; 4998 4999 /* GDS Base */ 5000 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5001 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5002 WRITE_DATA_DST_SEL(0))); 5003 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5004 amdgpu_ring_write(ring, 0); 5005 amdgpu_ring_write(ring, gds_base); 5006 5007 /* GDS Size */ 5008 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5009 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5010 WRITE_DATA_DST_SEL(0))); 5011 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5012 amdgpu_ring_write(ring, 0); 5013 amdgpu_ring_write(ring, gds_size); 5014 5015 /* GWS */ 5016 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5017 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5018 WRITE_DATA_DST_SEL(0))); 5019 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5020 amdgpu_ring_write(ring, 0); 5021 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5022 5023 /* OA */ 5024 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5025 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5026 WRITE_DATA_DST_SEL(0))); 5027 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5028 amdgpu_ring_write(ring, 0); 5029 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5030 } 5031 5032 static int gfx_v8_0_early_init(void *handle) 5033 { 5034 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5035 5036 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5037 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5038 gfx_v8_0_set_ring_funcs(adev); 5039 gfx_v8_0_set_irq_funcs(adev); 5040 gfx_v8_0_set_gds_init(adev); 5041 gfx_v8_0_set_rlc_funcs(adev); 5042 5043 return 0; 5044 } 5045 5046 static int gfx_v8_0_late_init(void *handle) 5047 { 5048 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5049 int r; 5050 5051 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5052 if (r) 5053 return r; 5054 5055 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5056 if (r) 5057 return r; 5058 5059 /* requires IBs so do in late init after IB pool is initialized */ 5060 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5061 if (r) 5062 return r; 5063 5064 amdgpu_set_powergating_state(adev, 5065 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5066 5067 return 0; 5068 } 5069 5070 static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5071 bool enable) 5072 { 5073 uint32_t data, temp; 5074 5075 /* Send msg to SMU via Powerplay */ 5076 amdgpu_set_powergating_state(adev, 5077 AMD_IP_BLOCK_TYPE_SMC, 5078 enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5079 5080 if (enable) { 5081 /* Enable static MGPG */ 5082 temp = data = RREG32(mmRLC_PG_CNTL); 5083 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 5084 5085 if (temp != data) 5086 WREG32(mmRLC_PG_CNTL, data); 5087 } else { 5088 temp = data = RREG32(mmRLC_PG_CNTL); 5089 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; 5090 5091 if (temp != data) 5092 WREG32(mmRLC_PG_CNTL, data); 5093 } 5094 } 5095 5096 static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5097 bool enable) 5098 { 5099 uint32_t data, temp; 5100 5101 if (enable) { 5102 /* Enable dynamic MGPG */ 5103 temp = data = RREG32(mmRLC_PG_CNTL); 5104 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 5105 5106 if (temp != data) 5107 WREG32(mmRLC_PG_CNTL, data); 5108 } else { 5109 temp = data = RREG32(mmRLC_PG_CNTL); 5110 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; 5111 5112 if (temp != data) 5113 WREG32(mmRLC_PG_CNTL, data); 5114 } 5115 } 5116 5117 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5118 bool enable) 5119 { 5120 uint32_t data, temp; 5121 5122 if (enable) { 5123 /* Enable quick PG */ 5124 temp = data = RREG32(mmRLC_PG_CNTL); 5125 data |= 0x100000; 5126 5127 if (temp != data) 5128 WREG32(mmRLC_PG_CNTL, data); 5129 } else { 5130 temp = data = RREG32(mmRLC_PG_CNTL); 5131 data &= ~0x100000; 5132 5133 if (temp != data) 5134 WREG32(mmRLC_PG_CNTL, data); 5135 } 5136 } 5137 5138 static int gfx_v8_0_set_powergating_state(void *handle, 5139 enum amd_powergating_state state) 5140 { 5141 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5142 5143 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) 5144 return 0; 5145 5146 switch (adev->asic_type) { 5147 case CHIP_POLARIS11: 5148 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) 5149 polaris11_enable_gfx_static_mg_power_gating(adev, 5150 state == AMD_PG_STATE_GATE ? true : false); 5151 else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) 5152 polaris11_enable_gfx_dynamic_mg_power_gating(adev, 5153 state == AMD_PG_STATE_GATE ? true : false); 5154 else 5155 polaris11_enable_gfx_quick_mg_power_gating(adev, 5156 state == AMD_PG_STATE_GATE ? true : false); 5157 break; 5158 default: 5159 break; 5160 } 5161 5162 return 0; 5163 } 5164 5165 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5166 uint32_t reg_addr, uint32_t cmd) 5167 { 5168 uint32_t data; 5169 5170 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 5171 5172 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5173 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5174 5175 data = RREG32(mmRLC_SERDES_WR_CTRL); 5176 if (adev->asic_type == CHIP_STONEY) 5177 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5178 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5179 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5180 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5181 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5182 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5183 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5184 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5185 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5186 else 5187 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5188 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5189 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5190 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5191 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5192 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5193 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5194 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5195 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5196 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5197 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5198 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5199 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5200 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5201 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5202 5203 WREG32(mmRLC_SERDES_WR_CTRL, data); 5204 } 5205 5206 #define MSG_ENTER_RLC_SAFE_MODE 1 5207 #define MSG_EXIT_RLC_SAFE_MODE 0 5208 5209 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5210 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5211 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5212 5213 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) 5214 { 5215 u32 data = 0; 5216 unsigned i; 5217 5218 data = RREG32(mmRLC_CNTL); 5219 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5220 return; 5221 5222 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5223 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5224 AMD_PG_SUPPORT_GFX_DMG))) { 5225 data |= RLC_GPR_REG2__REQ_MASK; 5226 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5227 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5228 WREG32(mmRLC_GPR_REG2, data); 5229 5230 for (i = 0; i < adev->usec_timeout; i++) { 5231 if ((RREG32(mmRLC_GPM_STAT) & 5232 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5233 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5234 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5235 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5236 break; 5237 udelay(1); 5238 } 5239 5240 for (i = 0; i < adev->usec_timeout; i++) { 5241 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) 5242 break; 5243 udelay(1); 5244 } 5245 adev->gfx.rlc.in_safe_mode = true; 5246 } 5247 } 5248 5249 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) 5250 { 5251 u32 data; 5252 unsigned i; 5253 5254 data = RREG32(mmRLC_CNTL); 5255 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) 5256 return; 5257 5258 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || 5259 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | 5260 AMD_PG_SUPPORT_GFX_DMG))) { 5261 data |= RLC_GPR_REG2__REQ_MASK; 5262 data &= ~RLC_GPR_REG2__MESSAGE_MASK; 5263 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); 5264 WREG32(mmRLC_GPR_REG2, data); 5265 adev->gfx.rlc.in_safe_mode = false; 5266 } 5267 5268 for (i = 0; i < adev->usec_timeout; i++) { 5269 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0) 5270 break; 5271 udelay(1); 5272 } 5273 } 5274 5275 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5276 { 5277 u32 data; 5278 unsigned i; 5279 5280 data = RREG32(mmRLC_CNTL); 5281 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5282 return; 5283 5284 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5285 data |= RLC_SAFE_MODE__CMD_MASK; 5286 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5287 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5288 WREG32(mmRLC_SAFE_MODE, data); 5289 5290 for (i = 0; i < adev->usec_timeout; i++) { 5291 if ((RREG32(mmRLC_GPM_STAT) & 5292 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5293 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5294 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5295 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5296 break; 5297 udelay(1); 5298 } 5299 5300 for (i = 0; i < adev->usec_timeout; i++) { 5301 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) 5302 break; 5303 udelay(1); 5304 } 5305 adev->gfx.rlc.in_safe_mode = true; 5306 } 5307 } 5308 5309 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5310 { 5311 u32 data = 0; 5312 unsigned i; 5313 5314 data = RREG32(mmRLC_CNTL); 5315 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5316 return; 5317 5318 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5319 if (adev->gfx.rlc.in_safe_mode) { 5320 data |= RLC_SAFE_MODE__CMD_MASK; 5321 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5322 WREG32(mmRLC_SAFE_MODE, data); 5323 adev->gfx.rlc.in_safe_mode = false; 5324 } 5325 } 5326 5327 for (i = 0; i < adev->usec_timeout; i++) { 5328 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0) 5329 break; 5330 udelay(1); 5331 } 5332 } 5333 5334 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) 5335 { 5336 adev->gfx.rlc.in_safe_mode = true; 5337 } 5338 5339 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) 5340 { 5341 adev->gfx.rlc.in_safe_mode = false; 5342 } 5343 5344 static const struct amdgpu_rlc_funcs cz_rlc_funcs = { 5345 .enter_safe_mode = cz_enter_rlc_safe_mode, 5346 .exit_safe_mode = cz_exit_rlc_safe_mode 5347 }; 5348 5349 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5350 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5351 .exit_safe_mode = iceland_exit_rlc_safe_mode 5352 }; 5353 5354 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { 5355 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, 5356 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode 5357 }; 5358 5359 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5360 bool enable) 5361 { 5362 uint32_t temp, data; 5363 5364 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5365 5366 /* It is disabled by HW by default */ 5367 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5368 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5369 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5370 /* 1 - RLC memory Light sleep */ 5371 temp = data = RREG32(mmRLC_MEM_SLP_CNTL); 5372 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5373 if (temp != data) 5374 WREG32(mmRLC_MEM_SLP_CNTL, data); 5375 } 5376 5377 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5378 /* 2 - CP memory Light sleep */ 5379 temp = data = RREG32(mmCP_MEM_SLP_CNTL); 5380 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5381 if (temp != data) 5382 WREG32(mmCP_MEM_SLP_CNTL, data); 5383 } 5384 } 5385 5386 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5387 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5388 if (adev->flags & AMD_IS_APU) 5389 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5390 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5391 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5392 else 5393 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5394 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5395 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5396 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5397 5398 if (temp != data) 5399 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5400 5401 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5402 gfx_v8_0_wait_for_rlc_serdes(adev); 5403 5404 /* 5 - clear mgcg override */ 5405 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5406 5407 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5408 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5409 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5410 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5411 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5412 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5413 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5414 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5415 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5416 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5417 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5418 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5419 if (temp != data) 5420 WREG32(mmCGTS_SM_CTRL_REG, data); 5421 } 5422 udelay(50); 5423 5424 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5425 gfx_v8_0_wait_for_rlc_serdes(adev); 5426 } else { 5427 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5428 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5429 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5430 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5431 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5432 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5433 if (temp != data) 5434 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5435 5436 /* 2 - disable MGLS in RLC */ 5437 data = RREG32(mmRLC_MEM_SLP_CNTL); 5438 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5439 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5440 WREG32(mmRLC_MEM_SLP_CNTL, data); 5441 } 5442 5443 /* 3 - disable MGLS in CP */ 5444 data = RREG32(mmCP_MEM_SLP_CNTL); 5445 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5446 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5447 WREG32(mmCP_MEM_SLP_CNTL, data); 5448 } 5449 5450 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5451 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5452 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5453 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5454 if (temp != data) 5455 WREG32(mmCGTS_SM_CTRL_REG, data); 5456 5457 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5458 gfx_v8_0_wait_for_rlc_serdes(adev); 5459 5460 /* 6 - set mgcg override */ 5461 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5462 5463 udelay(50); 5464 5465 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5466 gfx_v8_0_wait_for_rlc_serdes(adev); 5467 } 5468 5469 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5470 } 5471 5472 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5473 bool enable) 5474 { 5475 uint32_t temp, temp1, data, data1; 5476 5477 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5478 5479 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5480 5481 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5482 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5483 * Cmp_busy/GFX_Idle interrupts 5484 */ 5485 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5486 5487 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5488 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5489 if (temp1 != data1) 5490 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5491 5492 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5493 gfx_v8_0_wait_for_rlc_serdes(adev); 5494 5495 /* 3 - clear cgcg override */ 5496 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5497 5498 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5499 gfx_v8_0_wait_for_rlc_serdes(adev); 5500 5501 /* 4 - write cmd to set CGLS */ 5502 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5503 5504 /* 5 - enable cgcg */ 5505 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5506 5507 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5508 /* enable cgls*/ 5509 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5510 5511 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5512 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5513 5514 if (temp1 != data1) 5515 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5516 } else { 5517 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5518 } 5519 5520 if (temp != data) 5521 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5522 } else { 5523 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5524 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5525 5526 /* TEST CGCG */ 5527 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5528 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5529 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5530 if (temp1 != data1) 5531 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5532 5533 /* read gfx register to wake up cgcg */ 5534 RREG32(mmCB_CGTT_SCLK_CTRL); 5535 RREG32(mmCB_CGTT_SCLK_CTRL); 5536 RREG32(mmCB_CGTT_SCLK_CTRL); 5537 RREG32(mmCB_CGTT_SCLK_CTRL); 5538 5539 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5540 gfx_v8_0_wait_for_rlc_serdes(adev); 5541 5542 /* write cmd to Set CGCG Overrride */ 5543 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5544 5545 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5546 gfx_v8_0_wait_for_rlc_serdes(adev); 5547 5548 /* write cmd to Clear CGLS */ 5549 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5550 5551 /* disable cgcg, cgls should be disabled too. */ 5552 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5553 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5554 if (temp != data) 5555 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5556 } 5557 5558 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5559 } 5560 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5561 bool enable) 5562 { 5563 if (enable) { 5564 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5565 * === MGCG + MGLS + TS(CG/LS) === 5566 */ 5567 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5568 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5569 } else { 5570 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5571 * === CGCG + CGLS === 5572 */ 5573 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5574 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5575 } 5576 return 0; 5577 } 5578 5579 static int gfx_v8_0_set_clockgating_state(void *handle, 5580 enum amd_clockgating_state state) 5581 { 5582 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5583 5584 switch (adev->asic_type) { 5585 case CHIP_FIJI: 5586 case CHIP_CARRIZO: 5587 case CHIP_STONEY: 5588 gfx_v8_0_update_gfx_clock_gating(adev, 5589 state == AMD_CG_STATE_GATE ? true : false); 5590 break; 5591 default: 5592 break; 5593 } 5594 return 0; 5595 } 5596 5597 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) 5598 { 5599 u32 rptr; 5600 5601 rptr = ring->adev->wb.wb[ring->rptr_offs]; 5602 5603 return rptr; 5604 } 5605 5606 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5607 { 5608 struct amdgpu_device *adev = ring->adev; 5609 u32 wptr; 5610 5611 if (ring->use_doorbell) 5612 /* XXX check if swapping is necessary on BE */ 5613 wptr = ring->adev->wb.wb[ring->wptr_offs]; 5614 else 5615 wptr = RREG32(mmCP_RB0_WPTR); 5616 5617 return wptr; 5618 } 5619 5620 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5621 { 5622 struct amdgpu_device *adev = ring->adev; 5623 5624 if (ring->use_doorbell) { 5625 /* XXX check if swapping is necessary on BE */ 5626 adev->wb.wb[ring->wptr_offs] = ring->wptr; 5627 WDOORBELL32(ring->doorbell_index, ring->wptr); 5628 } else { 5629 WREG32(mmCP_RB0_WPTR, ring->wptr); 5630 (void)RREG32(mmCP_RB0_WPTR); 5631 } 5632 } 5633 5634 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5635 { 5636 u32 ref_and_mask, reg_mem_engine; 5637 5638 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) { 5639 switch (ring->me) { 5640 case 1: 5641 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 5642 break; 5643 case 2: 5644 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 5645 break; 5646 default: 5647 return; 5648 } 5649 reg_mem_engine = 0; 5650 } else { 5651 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 5652 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 5653 } 5654 5655 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5656 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 5657 WAIT_REG_MEM_FUNCTION(3) | /* == */ 5658 reg_mem_engine)); 5659 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 5660 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 5661 amdgpu_ring_write(ring, ref_and_mask); 5662 amdgpu_ring_write(ring, ref_and_mask); 5663 amdgpu_ring_write(ring, 0x20); /* poll interval */ 5664 } 5665 5666 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 5667 { 5668 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5669 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5670 WRITE_DATA_DST_SEL(0) | 5671 WR_CONFIRM)); 5672 amdgpu_ring_write(ring, mmHDP_DEBUG0); 5673 amdgpu_ring_write(ring, 0); 5674 amdgpu_ring_write(ring, 1); 5675 5676 } 5677 5678 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 5679 struct amdgpu_ib *ib, 5680 unsigned vm_id, bool ctx_switch) 5681 { 5682 u32 header, control = 0; 5683 u32 next_rptr = ring->wptr + 5; 5684 5685 if (ctx_switch) 5686 next_rptr += 2; 5687 5688 next_rptr += 4; 5689 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5690 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5691 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 5692 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 5693 amdgpu_ring_write(ring, next_rptr); 5694 5695 /* insert SWITCH_BUFFER packet before first IB in the ring frame */ 5696 if (ctx_switch) { 5697 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5698 amdgpu_ring_write(ring, 0); 5699 } 5700 5701 if (ib->flags & AMDGPU_IB_FLAG_CE) 5702 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 5703 else 5704 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5705 5706 control |= ib->length_dw | (vm_id << 24); 5707 5708 amdgpu_ring_write(ring, header); 5709 amdgpu_ring_write(ring, 5710 #ifdef __BIG_ENDIAN 5711 (2 << 0) | 5712 #endif 5713 (ib->gpu_addr & 0xFFFFFFFC)); 5714 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 5715 amdgpu_ring_write(ring, control); 5716 } 5717 5718 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 5719 struct amdgpu_ib *ib, 5720 unsigned vm_id, bool ctx_switch) 5721 { 5722 u32 header, control = 0; 5723 u32 next_rptr = ring->wptr + 5; 5724 5725 control |= INDIRECT_BUFFER_VALID; 5726 5727 next_rptr += 4; 5728 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5729 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); 5730 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 5731 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); 5732 amdgpu_ring_write(ring, next_rptr); 5733 5734 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 5735 5736 control |= ib->length_dw | (vm_id << 24); 5737 5738 amdgpu_ring_write(ring, header); 5739 amdgpu_ring_write(ring, 5740 #ifdef __BIG_ENDIAN 5741 (2 << 0) | 5742 #endif 5743 (ib->gpu_addr & 0xFFFFFFFC)); 5744 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 5745 amdgpu_ring_write(ring, control); 5746 } 5747 5748 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 5749 u64 seq, unsigned flags) 5750 { 5751 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5752 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5753 5754 /* EVENT_WRITE_EOP - flush caches, send int */ 5755 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 5756 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 5757 EOP_TC_ACTION_EN | 5758 EOP_TC_WB_ACTION_EN | 5759 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5760 EVENT_INDEX(5))); 5761 amdgpu_ring_write(ring, addr & 0xfffffffc); 5762 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 5763 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5764 amdgpu_ring_write(ring, lower_32_bits(seq)); 5765 amdgpu_ring_write(ring, upper_32_bits(seq)); 5766 5767 } 5768 5769 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 5770 { 5771 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 5772 uint32_t seq = ring->fence_drv.sync_seq; 5773 uint64_t addr = ring->fence_drv.gpu_addr; 5774 5775 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5776 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 5777 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 5778 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 5779 amdgpu_ring_write(ring, addr & 0xfffffffc); 5780 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 5781 amdgpu_ring_write(ring, seq); 5782 amdgpu_ring_write(ring, 0xffffffff); 5783 amdgpu_ring_write(ring, 4); /* poll interval */ 5784 5785 if (usepfp) { 5786 /* synce CE with ME to prevent CE fetch CEIB before context switch done */ 5787 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5788 amdgpu_ring_write(ring, 0); 5789 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5790 amdgpu_ring_write(ring, 0); 5791 } 5792 } 5793 5794 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 5795 unsigned vm_id, uint64_t pd_addr) 5796 { 5797 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); 5798 5799 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5800 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 5801 WRITE_DATA_DST_SEL(0)) | 5802 WR_CONFIRM); 5803 if (vm_id < 8) { 5804 amdgpu_ring_write(ring, 5805 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 5806 } else { 5807 amdgpu_ring_write(ring, 5808 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 5809 } 5810 amdgpu_ring_write(ring, 0); 5811 amdgpu_ring_write(ring, pd_addr >> 12); 5812 5813 /* bits 0-15 are the VM contexts0-15 */ 5814 /* invalidate the cache */ 5815 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5816 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5817 WRITE_DATA_DST_SEL(0))); 5818 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 5819 amdgpu_ring_write(ring, 0); 5820 amdgpu_ring_write(ring, 1 << vm_id); 5821 5822 /* wait for the invalidate to complete */ 5823 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 5824 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 5825 WAIT_REG_MEM_FUNCTION(0) | /* always */ 5826 WAIT_REG_MEM_ENGINE(0))); /* me */ 5827 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 5828 amdgpu_ring_write(ring, 0); 5829 amdgpu_ring_write(ring, 0); /* ref */ 5830 amdgpu_ring_write(ring, 0); /* mask */ 5831 amdgpu_ring_write(ring, 0x20); /* poll interval */ 5832 5833 /* compute doesn't have PFP */ 5834 if (usepfp) { 5835 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 5836 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 5837 amdgpu_ring_write(ring, 0x0); 5838 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5839 amdgpu_ring_write(ring, 0); 5840 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 5841 amdgpu_ring_write(ring, 0); 5842 } 5843 } 5844 5845 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 5846 { 5847 return ring->adev->wb.wb[ring->rptr_offs]; 5848 } 5849 5850 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 5851 { 5852 return ring->adev->wb.wb[ring->wptr_offs]; 5853 } 5854 5855 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 5856 { 5857 struct amdgpu_device *adev = ring->adev; 5858 5859 /* XXX check if swapping is necessary on BE */ 5860 adev->wb.wb[ring->wptr_offs] = ring->wptr; 5861 WDOORBELL32(ring->doorbell_index, ring->wptr); 5862 } 5863 5864 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 5865 u64 addr, u64 seq, 5866 unsigned flags) 5867 { 5868 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 5869 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 5870 5871 /* RELEASE_MEM - flush caches, send int */ 5872 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 5873 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 5874 EOP_TC_ACTION_EN | 5875 EOP_TC_WB_ACTION_EN | 5876 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 5877 EVENT_INDEX(5))); 5878 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 5879 amdgpu_ring_write(ring, addr & 0xfffffffc); 5880 amdgpu_ring_write(ring, upper_32_bits(addr)); 5881 amdgpu_ring_write(ring, lower_32_bits(seq)); 5882 amdgpu_ring_write(ring, upper_32_bits(seq)); 5883 } 5884 5885 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 5886 enum amdgpu_interrupt_state state) 5887 { 5888 u32 cp_int_cntl; 5889 5890 switch (state) { 5891 case AMDGPU_IRQ_STATE_DISABLE: 5892 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5893 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5894 TIME_STAMP_INT_ENABLE, 0); 5895 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5896 break; 5897 case AMDGPU_IRQ_STATE_ENABLE: 5898 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5899 cp_int_cntl = 5900 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5901 TIME_STAMP_INT_ENABLE, 1); 5902 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5903 break; 5904 default: 5905 break; 5906 } 5907 } 5908 5909 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 5910 int me, int pipe, 5911 enum amdgpu_interrupt_state state) 5912 { 5913 u32 mec_int_cntl, mec_int_cntl_reg; 5914 5915 /* 5916 * amdgpu controls only pipe 0 of MEC1. That's why this function only 5917 * handles the setting of interrupts for this specific pipe. All other 5918 * pipes' interrupts are set by amdkfd. 5919 */ 5920 5921 if (me == 1) { 5922 switch (pipe) { 5923 case 0: 5924 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 5925 break; 5926 default: 5927 DRM_DEBUG("invalid pipe %d\n", pipe); 5928 return; 5929 } 5930 } else { 5931 DRM_DEBUG("invalid me %d\n", me); 5932 return; 5933 } 5934 5935 switch (state) { 5936 case AMDGPU_IRQ_STATE_DISABLE: 5937 mec_int_cntl = RREG32(mec_int_cntl_reg); 5938 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5939 TIME_STAMP_INT_ENABLE, 0); 5940 WREG32(mec_int_cntl_reg, mec_int_cntl); 5941 break; 5942 case AMDGPU_IRQ_STATE_ENABLE: 5943 mec_int_cntl = RREG32(mec_int_cntl_reg); 5944 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, 5945 TIME_STAMP_INT_ENABLE, 1); 5946 WREG32(mec_int_cntl_reg, mec_int_cntl); 5947 break; 5948 default: 5949 break; 5950 } 5951 } 5952 5953 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 5954 struct amdgpu_irq_src *source, 5955 unsigned type, 5956 enum amdgpu_interrupt_state state) 5957 { 5958 u32 cp_int_cntl; 5959 5960 switch (state) { 5961 case AMDGPU_IRQ_STATE_DISABLE: 5962 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5963 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5964 PRIV_REG_INT_ENABLE, 0); 5965 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5966 break; 5967 case AMDGPU_IRQ_STATE_ENABLE: 5968 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5969 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5970 PRIV_REG_INT_ENABLE, 1); 5971 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5972 break; 5973 default: 5974 break; 5975 } 5976 5977 return 0; 5978 } 5979 5980 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 5981 struct amdgpu_irq_src *source, 5982 unsigned type, 5983 enum amdgpu_interrupt_state state) 5984 { 5985 u32 cp_int_cntl; 5986 5987 switch (state) { 5988 case AMDGPU_IRQ_STATE_DISABLE: 5989 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5990 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5991 PRIV_INSTR_INT_ENABLE, 0); 5992 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5993 break; 5994 case AMDGPU_IRQ_STATE_ENABLE: 5995 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0); 5996 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0, 5997 PRIV_INSTR_INT_ENABLE, 1); 5998 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl); 5999 break; 6000 default: 6001 break; 6002 } 6003 6004 return 0; 6005 } 6006 6007 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6008 struct amdgpu_irq_src *src, 6009 unsigned type, 6010 enum amdgpu_interrupt_state state) 6011 { 6012 switch (type) { 6013 case AMDGPU_CP_IRQ_GFX_EOP: 6014 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6015 break; 6016 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6017 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6018 break; 6019 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6020 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6021 break; 6022 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6023 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6024 break; 6025 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6026 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6027 break; 6028 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6029 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6030 break; 6031 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6032 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6033 break; 6034 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6035 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6036 break; 6037 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6038 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6039 break; 6040 default: 6041 break; 6042 } 6043 return 0; 6044 } 6045 6046 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6047 struct amdgpu_irq_src *source, 6048 struct amdgpu_iv_entry *entry) 6049 { 6050 int i; 6051 u8 me_id, pipe_id, queue_id; 6052 struct amdgpu_ring *ring; 6053 6054 DRM_DEBUG("IH: CP EOP\n"); 6055 me_id = (entry->ring_id & 0x0c) >> 2; 6056 pipe_id = (entry->ring_id & 0x03) >> 0; 6057 queue_id = (entry->ring_id & 0x70) >> 4; 6058 6059 switch (me_id) { 6060 case 0: 6061 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6062 break; 6063 case 1: 6064 case 2: 6065 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6066 ring = &adev->gfx.compute_ring[i]; 6067 /* Per-queue interrupt is supported for MEC starting from VI. 6068 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6069 */ 6070 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6071 amdgpu_fence_process(ring); 6072 } 6073 break; 6074 } 6075 return 0; 6076 } 6077 6078 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6079 struct amdgpu_irq_src *source, 6080 struct amdgpu_iv_entry *entry) 6081 { 6082 DRM_ERROR("Illegal register access in command stream\n"); 6083 schedule_work(&adev->reset_work); 6084 return 0; 6085 } 6086 6087 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6088 struct amdgpu_irq_src *source, 6089 struct amdgpu_iv_entry *entry) 6090 { 6091 DRM_ERROR("Illegal instruction in command stream\n"); 6092 schedule_work(&adev->reset_work); 6093 return 0; 6094 } 6095 6096 const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6097 .name = "gfx_v8_0", 6098 .early_init = gfx_v8_0_early_init, 6099 .late_init = gfx_v8_0_late_init, 6100 .sw_init = gfx_v8_0_sw_init, 6101 .sw_fini = gfx_v8_0_sw_fini, 6102 .hw_init = gfx_v8_0_hw_init, 6103 .hw_fini = gfx_v8_0_hw_fini, 6104 .suspend = gfx_v8_0_suspend, 6105 .resume = gfx_v8_0_resume, 6106 .is_idle = gfx_v8_0_is_idle, 6107 .wait_for_idle = gfx_v8_0_wait_for_idle, 6108 .soft_reset = gfx_v8_0_soft_reset, 6109 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6110 .set_powergating_state = gfx_v8_0_set_powergating_state, 6111 }; 6112 6113 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6114 .get_rptr = gfx_v8_0_ring_get_rptr_gfx, 6115 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6116 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6117 .parse_cs = NULL, 6118 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6119 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6120 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6121 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6122 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6123 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6124 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6125 .test_ring = gfx_v8_0_ring_test_ring, 6126 .test_ib = gfx_v8_0_ring_test_ib, 6127 .insert_nop = amdgpu_ring_insert_nop, 6128 .pad_ib = amdgpu_ring_generic_pad_ib, 6129 }; 6130 6131 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6132 .get_rptr = gfx_v8_0_ring_get_rptr_compute, 6133 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6134 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6135 .parse_cs = NULL, 6136 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6137 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6138 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6139 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6140 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6141 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6142 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 6143 .test_ring = gfx_v8_0_ring_test_ring, 6144 .test_ib = gfx_v8_0_ring_test_ib, 6145 .insert_nop = amdgpu_ring_insert_nop, 6146 .pad_ib = amdgpu_ring_generic_pad_ib, 6147 }; 6148 6149 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6150 { 6151 int i; 6152 6153 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6154 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6155 6156 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6157 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6158 } 6159 6160 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6161 .set = gfx_v8_0_set_eop_interrupt_state, 6162 .process = gfx_v8_0_eop_irq, 6163 }; 6164 6165 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6166 .set = gfx_v8_0_set_priv_reg_fault_state, 6167 .process = gfx_v8_0_priv_reg_irq, 6168 }; 6169 6170 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6171 .set = gfx_v8_0_set_priv_inst_fault_state, 6172 .process = gfx_v8_0_priv_inst_irq, 6173 }; 6174 6175 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6176 { 6177 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6178 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6179 6180 adev->gfx.priv_reg_irq.num_types = 1; 6181 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6182 6183 adev->gfx.priv_inst_irq.num_types = 1; 6184 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6185 } 6186 6187 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 6188 { 6189 switch (adev->asic_type) { 6190 case CHIP_TOPAZ: 6191 case CHIP_STONEY: 6192 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 6193 break; 6194 case CHIP_CARRIZO: 6195 adev->gfx.rlc.funcs = &cz_rlc_funcs; 6196 break; 6197 default: 6198 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; 6199 break; 6200 } 6201 } 6202 6203 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 6204 { 6205 /* init asci gds info */ 6206 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 6207 adev->gds.gws.total_size = 64; 6208 adev->gds.oa.total_size = 16; 6209 6210 if (adev->gds.mem.total_size == 64 * 1024) { 6211 adev->gds.mem.gfx_partition_size = 4096; 6212 adev->gds.mem.cs_partition_size = 4096; 6213 6214 adev->gds.gws.gfx_partition_size = 4; 6215 adev->gds.gws.cs_partition_size = 4; 6216 6217 adev->gds.oa.gfx_partition_size = 4; 6218 adev->gds.oa.cs_partition_size = 1; 6219 } else { 6220 adev->gds.mem.gfx_partition_size = 1024; 6221 adev->gds.mem.cs_partition_size = 1024; 6222 6223 adev->gds.gws.gfx_partition_size = 16; 6224 adev->gds.gws.cs_partition_size = 16; 6225 6226 adev->gds.oa.gfx_partition_size = 4; 6227 adev->gds.oa.cs_partition_size = 4; 6228 } 6229 } 6230 6231 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 6232 { 6233 u32 data, mask; 6234 6235 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); 6236 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 6237 6238 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 6239 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 6240 6241 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 6242 6243 return (~data) & mask; 6244 } 6245 6246 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 6247 { 6248 int i, j, k, counter, active_cu_number = 0; 6249 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 6250 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 6251 6252 memset(cu_info, 0, sizeof(*cu_info)); 6253 6254 mutex_lock(&adev->grbm_idx_mutex); 6255 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 6256 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 6257 mask = 1; 6258 ao_bitmap = 0; 6259 counter = 0; 6260 gfx_v8_0_select_se_sh(adev, i, j); 6261 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 6262 cu_info->bitmap[i][j] = bitmap; 6263 6264 for (k = 0; k < 16; k ++) { 6265 if (bitmap & mask) { 6266 if (counter < 2) 6267 ao_bitmap |= mask; 6268 counter ++; 6269 } 6270 mask <<= 1; 6271 } 6272 active_cu_number += counter; 6273 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 6274 } 6275 } 6276 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); 6277 mutex_unlock(&adev->grbm_idx_mutex); 6278 6279 cu_info->number = active_cu_number; 6280 cu_info->ao_cu_mask = ao_cu_mask; 6281 } 6282