1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/firmware.h> 24 #include "drmP.h" 25 #include "amdgpu.h" 26 #include "amdgpu_gfx.h" 27 #include "vi.h" 28 #include "vi_structs.h" 29 #include "vid.h" 30 #include "amdgpu_ucode.h" 31 #include "amdgpu_atombios.h" 32 #include "atombios_i2c.h" 33 #include "clearstate_vi.h" 34 35 #include "gmc/gmc_8_2_d.h" 36 #include "gmc/gmc_8_2_sh_mask.h" 37 38 #include "oss/oss_3_0_d.h" 39 #include "oss/oss_3_0_sh_mask.h" 40 41 #include "bif/bif_5_0_d.h" 42 #include "bif/bif_5_0_sh_mask.h" 43 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #define GFX8_NUM_GFX_RINGS 1 55 #define GFX8_NUM_COMPUTE_RINGS 8 56 57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 61 62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 71 72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 78 79 /* BPM SERDES CMD */ 80 #define SET_BPM_SERDES_CMD 1 81 #define CLE_BPM_SERDES_CMD 0 82 83 /* BPM Register Address*/ 84 enum { 85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 90 BPM_REG_FGCG_MAX 91 }; 92 93 #define RLC_FormatDirectRegListLength 14 94 95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 101 102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 104 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 114 115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 117 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 127 128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 134 135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 148 149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 150 { 151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 167 }; 168 169 static const u32 golden_settings_tonga_a11[] = 170 { 171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 173 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 174 mmGB_GPU_ID, 0x0000000f, 0x00000000, 175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 181 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 187 }; 188 189 static const u32 tonga_golden_common_all[] = 190 { 191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 199 }; 200 201 static const u32 tonga_mgcg_cgcg_init[] = 202 { 203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 278 }; 279 280 static const u32 golden_settings_polaris11_a11[] = 281 { 282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 285 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 292 mmSQ_CONFIG, 0x07f80000, 0x01180000, 293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 294 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 299 }; 300 301 static const u32 polaris11_golden_common_all[] = 302 { 303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 309 }; 310 311 static const u32 golden_settings_polaris10_a11[] = 312 { 313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 317 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 324 mmSQ_CONFIG, 0x07f80000, 0x07180000, 325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 326 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 330 }; 331 332 static const u32 polaris10_golden_common_all[] = 333 { 334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 342 }; 343 344 static const u32 fiji_golden_common_all[] = 345 { 346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 356 }; 357 358 static const u32 golden_settings_fiji_a10[] = 359 { 360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 361 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 367 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 371 }; 372 373 static const u32 fiji_mgcg_cgcg_init[] = 374 { 375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 410 }; 411 412 static const u32 golden_settings_iceland_a11[] = 413 { 414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 415 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 416 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 417 mmGB_GPU_ID, 0x0000000f, 0x00000000, 418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 425 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 430 }; 431 432 static const u32 iceland_golden_common_all[] = 433 { 434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 442 }; 443 444 static const u32 iceland_mgcg_cgcg_init[] = 445 { 446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 510 }; 511 512 static const u32 cz_golden_settings_a11[] = 513 { 514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 515 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 516 mmGB_GPU_ID, 0x0000000f, 0x00000000, 517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 522 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 526 }; 527 528 static const u32 cz_golden_common_all[] = 529 { 530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF 538 }; 539 540 static const u32 cz_mgcg_cgcg_init[] = 541 { 542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 617 }; 618 619 static const u32 stoney_golden_settings_a11[] = 620 { 621 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 622 mmGB_GPU_ID, 0x0000000f, 0x00000000, 623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 627 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 631 }; 632 633 static const u32 stoney_golden_common_all[] = 634 { 635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF, 642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF, 643 }; 644 645 static const u32 stoney_mgcg_cgcg_init[] = 646 { 647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 652 }; 653 654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 660 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); 661 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); 662 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); 663 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); 664 665 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 666 { 667 switch (adev->asic_type) { 668 case CHIP_TOPAZ: 669 amdgpu_program_register_sequence(adev, 670 iceland_mgcg_cgcg_init, 671 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init)); 672 amdgpu_program_register_sequence(adev, 673 golden_settings_iceland_a11, 674 (const u32)ARRAY_SIZE(golden_settings_iceland_a11)); 675 amdgpu_program_register_sequence(adev, 676 iceland_golden_common_all, 677 (const u32)ARRAY_SIZE(iceland_golden_common_all)); 678 break; 679 case CHIP_FIJI: 680 amdgpu_program_register_sequence(adev, 681 fiji_mgcg_cgcg_init, 682 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init)); 683 amdgpu_program_register_sequence(adev, 684 golden_settings_fiji_a10, 685 (const u32)ARRAY_SIZE(golden_settings_fiji_a10)); 686 amdgpu_program_register_sequence(adev, 687 fiji_golden_common_all, 688 (const u32)ARRAY_SIZE(fiji_golden_common_all)); 689 break; 690 691 case CHIP_TONGA: 692 amdgpu_program_register_sequence(adev, 693 tonga_mgcg_cgcg_init, 694 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init)); 695 amdgpu_program_register_sequence(adev, 696 golden_settings_tonga_a11, 697 (const u32)ARRAY_SIZE(golden_settings_tonga_a11)); 698 amdgpu_program_register_sequence(adev, 699 tonga_golden_common_all, 700 (const u32)ARRAY_SIZE(tonga_golden_common_all)); 701 break; 702 case CHIP_POLARIS11: 703 case CHIP_POLARIS12: 704 amdgpu_program_register_sequence(adev, 705 golden_settings_polaris11_a11, 706 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11)); 707 amdgpu_program_register_sequence(adev, 708 polaris11_golden_common_all, 709 (const u32)ARRAY_SIZE(polaris11_golden_common_all)); 710 break; 711 case CHIP_POLARIS10: 712 amdgpu_program_register_sequence(adev, 713 golden_settings_polaris10_a11, 714 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11)); 715 amdgpu_program_register_sequence(adev, 716 polaris10_golden_common_all, 717 (const u32)ARRAY_SIZE(polaris10_golden_common_all)); 718 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 719 if (adev->pdev->revision == 0xc7 && 720 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 721 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 722 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 723 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 724 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 725 } 726 break; 727 case CHIP_CARRIZO: 728 amdgpu_program_register_sequence(adev, 729 cz_mgcg_cgcg_init, 730 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init)); 731 amdgpu_program_register_sequence(adev, 732 cz_golden_settings_a11, 733 (const u32)ARRAY_SIZE(cz_golden_settings_a11)); 734 amdgpu_program_register_sequence(adev, 735 cz_golden_common_all, 736 (const u32)ARRAY_SIZE(cz_golden_common_all)); 737 break; 738 case CHIP_STONEY: 739 amdgpu_program_register_sequence(adev, 740 stoney_mgcg_cgcg_init, 741 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init)); 742 amdgpu_program_register_sequence(adev, 743 stoney_golden_settings_a11, 744 (const u32)ARRAY_SIZE(stoney_golden_settings_a11)); 745 amdgpu_program_register_sequence(adev, 746 stoney_golden_common_all, 747 (const u32)ARRAY_SIZE(stoney_golden_common_all)); 748 break; 749 default: 750 break; 751 } 752 } 753 754 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 755 { 756 adev->gfx.scratch.num_reg = 7; 757 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 758 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 759 } 760 761 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 762 { 763 struct amdgpu_device *adev = ring->adev; 764 uint32_t scratch; 765 uint32_t tmp = 0; 766 unsigned i; 767 int r; 768 769 r = amdgpu_gfx_scratch_get(adev, &scratch); 770 if (r) { 771 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 772 return r; 773 } 774 WREG32(scratch, 0xCAFEDEAD); 775 r = amdgpu_ring_alloc(ring, 3); 776 if (r) { 777 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 778 ring->idx, r); 779 amdgpu_gfx_scratch_free(adev, scratch); 780 return r; 781 } 782 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 783 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 784 amdgpu_ring_write(ring, 0xDEADBEEF); 785 amdgpu_ring_commit(ring); 786 787 for (i = 0; i < adev->usec_timeout; i++) { 788 tmp = RREG32(scratch); 789 if (tmp == 0xDEADBEEF) 790 break; 791 DRM_UDELAY(1); 792 } 793 if (i < adev->usec_timeout) { 794 DRM_INFO("ring test on %d succeeded in %d usecs\n", 795 ring->idx, i); 796 } else { 797 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 798 ring->idx, scratch, tmp); 799 r = -EINVAL; 800 } 801 amdgpu_gfx_scratch_free(adev, scratch); 802 return r; 803 } 804 805 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 806 { 807 struct amdgpu_device *adev = ring->adev; 808 struct amdgpu_ib ib; 809 struct dma_fence *f = NULL; 810 uint32_t scratch; 811 uint32_t tmp = 0; 812 long r; 813 814 r = amdgpu_gfx_scratch_get(adev, &scratch); 815 if (r) { 816 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); 817 return r; 818 } 819 WREG32(scratch, 0xCAFEDEAD); 820 memset(&ib, 0, sizeof(ib)); 821 r = amdgpu_ib_get(adev, NULL, 256, &ib); 822 if (r) { 823 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 824 goto err1; 825 } 826 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 827 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); 828 ib.ptr[2] = 0xDEADBEEF; 829 ib.length_dw = 3; 830 831 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 832 if (r) 833 goto err2; 834 835 r = dma_fence_wait_timeout(f, false, timeout); 836 if (r == 0) { 837 DRM_ERROR("amdgpu: IB test timed out.\n"); 838 r = -ETIMEDOUT; 839 goto err2; 840 } else if (r < 0) { 841 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 842 goto err2; 843 } 844 tmp = RREG32(scratch); 845 if (tmp == 0xDEADBEEF) { 846 DRM_INFO("ib test on ring %d succeeded\n", ring->idx); 847 r = 0; 848 } else { 849 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", 850 scratch, tmp); 851 r = -EINVAL; 852 } 853 err2: 854 amdgpu_ib_free(adev, &ib, NULL); 855 dma_fence_put(f); 856 err1: 857 amdgpu_gfx_scratch_free(adev, scratch); 858 return r; 859 } 860 861 862 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { 863 release_firmware(adev->gfx.pfp_fw); 864 adev->gfx.pfp_fw = NULL; 865 release_firmware(adev->gfx.me_fw); 866 adev->gfx.me_fw = NULL; 867 release_firmware(adev->gfx.ce_fw); 868 adev->gfx.ce_fw = NULL; 869 release_firmware(adev->gfx.rlc_fw); 870 adev->gfx.rlc_fw = NULL; 871 release_firmware(adev->gfx.mec_fw); 872 adev->gfx.mec_fw = NULL; 873 if ((adev->asic_type != CHIP_STONEY) && 874 (adev->asic_type != CHIP_TOPAZ)) 875 release_firmware(adev->gfx.mec2_fw); 876 adev->gfx.mec2_fw = NULL; 877 878 kfree(adev->gfx.rlc.register_list_format); 879 } 880 881 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 882 { 883 const char *chip_name; 884 char fw_name[30]; 885 int err; 886 struct amdgpu_firmware_info *info = NULL; 887 const struct common_firmware_header *header = NULL; 888 const struct gfx_firmware_header_v1_0 *cp_hdr; 889 const struct rlc_firmware_header_v2_0 *rlc_hdr; 890 unsigned int *tmp = NULL, i; 891 892 DRM_DEBUG("\n"); 893 894 switch (adev->asic_type) { 895 case CHIP_TOPAZ: 896 chip_name = "topaz"; 897 break; 898 case CHIP_TONGA: 899 chip_name = "tonga"; 900 break; 901 case CHIP_CARRIZO: 902 chip_name = "carrizo"; 903 break; 904 case CHIP_FIJI: 905 chip_name = "fiji"; 906 break; 907 case CHIP_POLARIS11: 908 chip_name = "polaris11"; 909 break; 910 case CHIP_POLARIS10: 911 chip_name = "polaris10"; 912 break; 913 case CHIP_POLARIS12: 914 chip_name = "polaris12"; 915 break; 916 case CHIP_STONEY: 917 chip_name = "stoney"; 918 break; 919 default: 920 BUG(); 921 } 922 923 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 924 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 925 if (err) 926 goto out; 927 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 928 if (err) 929 goto out; 930 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 931 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 932 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 933 934 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 935 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 936 if (err) 937 goto out; 938 err = amdgpu_ucode_validate(adev->gfx.me_fw); 939 if (err) 940 goto out; 941 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 942 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 943 944 /* chain ib ucode isn't formal released, just disable it by far 945 * TODO: when ucod ready we should use ucode version to judge if 946 * chain-ib support or not. 947 */ 948 adev->virt.chained_ib_support = false; 949 950 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 951 952 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 953 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 954 if (err) 955 goto out; 956 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 957 if (err) 958 goto out; 959 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 960 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 961 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 962 963 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 964 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 965 if (err) 966 goto out; 967 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 968 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 969 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 970 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 971 972 adev->gfx.rlc.save_and_restore_offset = 973 le32_to_cpu(rlc_hdr->save_and_restore_offset); 974 adev->gfx.rlc.clear_state_descriptor_offset = 975 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 976 adev->gfx.rlc.avail_scratch_ram_locations = 977 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 978 adev->gfx.rlc.reg_restore_list_size = 979 le32_to_cpu(rlc_hdr->reg_restore_list_size); 980 adev->gfx.rlc.reg_list_format_start = 981 le32_to_cpu(rlc_hdr->reg_list_format_start); 982 adev->gfx.rlc.reg_list_format_separate_start = 983 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 984 adev->gfx.rlc.starting_offsets_start = 985 le32_to_cpu(rlc_hdr->starting_offsets_start); 986 adev->gfx.rlc.reg_list_format_size_bytes = 987 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 988 adev->gfx.rlc.reg_list_size_bytes = 989 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 990 991 adev->gfx.rlc.register_list_format = 992 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 993 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 994 995 if (!adev->gfx.rlc.register_list_format) { 996 err = -ENOMEM; 997 goto out; 998 } 999 1000 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1001 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1002 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1003 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1004 1005 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1006 1007 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1008 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1009 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1010 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1011 1012 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1013 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1014 if (err) 1015 goto out; 1016 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1017 if (err) 1018 goto out; 1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1020 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1021 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1022 1023 if ((adev->asic_type != CHIP_STONEY) && 1024 (adev->asic_type != CHIP_TOPAZ)) { 1025 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1026 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1027 if (!err) { 1028 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1029 if (err) 1030 goto out; 1031 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1032 adev->gfx.mec2_fw->data; 1033 adev->gfx.mec2_fw_version = 1034 le32_to_cpu(cp_hdr->header.ucode_version); 1035 adev->gfx.mec2_feature_version = 1036 le32_to_cpu(cp_hdr->ucode_feature_version); 1037 } else { 1038 err = 0; 1039 adev->gfx.mec2_fw = NULL; 1040 } 1041 } 1042 1043 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1044 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1045 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1046 info->fw = adev->gfx.pfp_fw; 1047 header = (const struct common_firmware_header *)info->fw->data; 1048 adev->firmware.fw_size += 1049 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1050 1051 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1052 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1053 info->fw = adev->gfx.me_fw; 1054 header = (const struct common_firmware_header *)info->fw->data; 1055 adev->firmware.fw_size += 1056 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1057 1058 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1059 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1060 info->fw = adev->gfx.ce_fw; 1061 header = (const struct common_firmware_header *)info->fw->data; 1062 adev->firmware.fw_size += 1063 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1064 1065 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1066 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1067 info->fw = adev->gfx.rlc_fw; 1068 header = (const struct common_firmware_header *)info->fw->data; 1069 adev->firmware.fw_size += 1070 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1071 1072 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1073 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1074 info->fw = adev->gfx.mec_fw; 1075 header = (const struct common_firmware_header *)info->fw->data; 1076 adev->firmware.fw_size += 1077 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1078 1079 /* we need account JT in */ 1080 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1081 adev->firmware.fw_size += 1082 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1083 1084 if (amdgpu_sriov_vf(adev)) { 1085 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1086 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1087 info->fw = adev->gfx.mec_fw; 1088 adev->firmware.fw_size += 1089 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1090 } 1091 1092 if (adev->gfx.mec2_fw) { 1093 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1094 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1095 info->fw = adev->gfx.mec2_fw; 1096 header = (const struct common_firmware_header *)info->fw->data; 1097 adev->firmware.fw_size += 1098 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1099 } 1100 1101 } 1102 1103 out: 1104 if (err) { 1105 dev_err(adev->dev, 1106 "gfx8: Failed to load firmware \"%s\"\n", 1107 fw_name); 1108 release_firmware(adev->gfx.pfp_fw); 1109 adev->gfx.pfp_fw = NULL; 1110 release_firmware(adev->gfx.me_fw); 1111 adev->gfx.me_fw = NULL; 1112 release_firmware(adev->gfx.ce_fw); 1113 adev->gfx.ce_fw = NULL; 1114 release_firmware(adev->gfx.rlc_fw); 1115 adev->gfx.rlc_fw = NULL; 1116 release_firmware(adev->gfx.mec_fw); 1117 adev->gfx.mec_fw = NULL; 1118 release_firmware(adev->gfx.mec2_fw); 1119 adev->gfx.mec2_fw = NULL; 1120 } 1121 return err; 1122 } 1123 1124 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1125 volatile u32 *buffer) 1126 { 1127 u32 count = 0, i; 1128 const struct cs_section_def *sect = NULL; 1129 const struct cs_extent_def *ext = NULL; 1130 1131 if (adev->gfx.rlc.cs_data == NULL) 1132 return; 1133 if (buffer == NULL) 1134 return; 1135 1136 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1137 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1138 1139 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1140 buffer[count++] = cpu_to_le32(0x80000000); 1141 buffer[count++] = cpu_to_le32(0x80000000); 1142 1143 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1144 for (ext = sect->section; ext->extent != NULL; ++ext) { 1145 if (sect->id == SECT_CONTEXT) { 1146 buffer[count++] = 1147 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1148 buffer[count++] = cpu_to_le32(ext->reg_index - 1149 PACKET3_SET_CONTEXT_REG_START); 1150 for (i = 0; i < ext->reg_count; i++) 1151 buffer[count++] = cpu_to_le32(ext->extent[i]); 1152 } else { 1153 return; 1154 } 1155 } 1156 } 1157 1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1159 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1160 PACKET3_SET_CONTEXT_REG_START); 1161 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1162 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1163 1164 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1165 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1166 1167 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1168 buffer[count++] = cpu_to_le32(0); 1169 } 1170 1171 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1172 { 1173 const __le32 *fw_data; 1174 volatile u32 *dst_ptr; 1175 int me, i, max_me = 4; 1176 u32 bo_offset = 0; 1177 u32 table_offset, table_size; 1178 1179 if (adev->asic_type == CHIP_CARRIZO) 1180 max_me = 5; 1181 1182 /* write the cp table buffer */ 1183 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1184 for (me = 0; me < max_me; me++) { 1185 if (me == 0) { 1186 const struct gfx_firmware_header_v1_0 *hdr = 1187 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1188 fw_data = (const __le32 *) 1189 (adev->gfx.ce_fw->data + 1190 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1191 table_offset = le32_to_cpu(hdr->jt_offset); 1192 table_size = le32_to_cpu(hdr->jt_size); 1193 } else if (me == 1) { 1194 const struct gfx_firmware_header_v1_0 *hdr = 1195 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1196 fw_data = (const __le32 *) 1197 (adev->gfx.pfp_fw->data + 1198 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1199 table_offset = le32_to_cpu(hdr->jt_offset); 1200 table_size = le32_to_cpu(hdr->jt_size); 1201 } else if (me == 2) { 1202 const struct gfx_firmware_header_v1_0 *hdr = 1203 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1204 fw_data = (const __le32 *) 1205 (adev->gfx.me_fw->data + 1206 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1207 table_offset = le32_to_cpu(hdr->jt_offset); 1208 table_size = le32_to_cpu(hdr->jt_size); 1209 } else if (me == 3) { 1210 const struct gfx_firmware_header_v1_0 *hdr = 1211 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1212 fw_data = (const __le32 *) 1213 (adev->gfx.mec_fw->data + 1214 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1215 table_offset = le32_to_cpu(hdr->jt_offset); 1216 table_size = le32_to_cpu(hdr->jt_size); 1217 } else if (me == 4) { 1218 const struct gfx_firmware_header_v1_0 *hdr = 1219 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1220 fw_data = (const __le32 *) 1221 (adev->gfx.mec2_fw->data + 1222 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1223 table_offset = le32_to_cpu(hdr->jt_offset); 1224 table_size = le32_to_cpu(hdr->jt_size); 1225 } 1226 1227 for (i = 0; i < table_size; i ++) { 1228 dst_ptr[bo_offset + i] = 1229 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1230 } 1231 1232 bo_offset += table_size; 1233 } 1234 } 1235 1236 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1237 { 1238 int r; 1239 1240 /* clear state block */ 1241 if (adev->gfx.rlc.clear_state_obj) { 1242 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1243 if (unlikely(r != 0)) 1244 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r); 1245 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); 1246 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1247 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); 1248 adev->gfx.rlc.clear_state_obj = NULL; 1249 } 1250 1251 /* jump table block */ 1252 if (adev->gfx.rlc.cp_table_obj) { 1253 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1254 if (unlikely(r != 0)) 1255 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1256 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj); 1257 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1258 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj); 1259 adev->gfx.rlc.cp_table_obj = NULL; 1260 } 1261 } 1262 1263 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1264 { 1265 volatile u32 *dst_ptr; 1266 u32 dws; 1267 const struct cs_section_def *cs_data; 1268 int r; 1269 1270 adev->gfx.rlc.cs_data = vi_cs_data; 1271 1272 cs_data = adev->gfx.rlc.cs_data; 1273 1274 if (cs_data) { 1275 /* clear state block */ 1276 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1277 1278 if (adev->gfx.rlc.clear_state_obj == NULL) { 1279 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true, 1280 AMDGPU_GEM_DOMAIN_VRAM, 1281 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1282 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1283 NULL, NULL, 1284 &adev->gfx.rlc.clear_state_obj); 1285 if (r) { 1286 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1287 gfx_v8_0_rlc_fini(adev); 1288 return r; 1289 } 1290 } 1291 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); 1292 if (unlikely(r != 0)) { 1293 gfx_v8_0_rlc_fini(adev); 1294 return r; 1295 } 1296 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM, 1297 &adev->gfx.rlc.clear_state_gpu_addr); 1298 if (r) { 1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1300 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r); 1301 gfx_v8_0_rlc_fini(adev); 1302 return r; 1303 } 1304 1305 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr); 1306 if (r) { 1307 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r); 1308 gfx_v8_0_rlc_fini(adev); 1309 return r; 1310 } 1311 /* set up the cs buffer */ 1312 dst_ptr = adev->gfx.rlc.cs_ptr; 1313 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1314 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1315 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1316 } 1317 1318 if ((adev->asic_type == CHIP_CARRIZO) || 1319 (adev->asic_type == CHIP_STONEY)) { 1320 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1321 if (adev->gfx.rlc.cp_table_obj == NULL) { 1322 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true, 1323 AMDGPU_GEM_DOMAIN_VRAM, 1324 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | 1325 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, 1326 NULL, NULL, 1327 &adev->gfx.rlc.cp_table_obj); 1328 if (r) { 1329 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1330 return r; 1331 } 1332 } 1333 1334 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false); 1335 if (unlikely(r != 0)) { 1336 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r); 1337 return r; 1338 } 1339 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM, 1340 &adev->gfx.rlc.cp_table_gpu_addr); 1341 if (r) { 1342 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1343 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r); 1344 return r; 1345 } 1346 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr); 1347 if (r) { 1348 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r); 1349 return r; 1350 } 1351 1352 cz_init_cp_jump_table(adev); 1353 1354 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1355 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1356 } 1357 1358 return 0; 1359 } 1360 1361 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1362 { 1363 int r; 1364 1365 if (adev->gfx.mec.hpd_eop_obj) { 1366 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1367 if (unlikely(r != 0)) 1368 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r); 1369 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj); 1370 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1371 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj); 1372 adev->gfx.mec.hpd_eop_obj = NULL; 1373 } 1374 } 1375 1376 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, 1377 struct amdgpu_ring *ring, 1378 struct amdgpu_irq_src *irq) 1379 { 1380 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1381 int r = 0; 1382 1383 r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); 1384 if (r) 1385 return r; 1386 1387 ring->adev = NULL; 1388 ring->ring_obj = NULL; 1389 ring->use_doorbell = true; 1390 ring->doorbell_index = AMDGPU_DOORBELL_KIQ; 1391 if (adev->gfx.mec2_fw) { 1392 ring->me = 2; 1393 ring->pipe = 0; 1394 } else { 1395 ring->me = 1; 1396 ring->pipe = 1; 1397 } 1398 1399 ring->queue = 0; 1400 ring->eop_gpu_addr = kiq->eop_gpu_addr; 1401 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); 1402 r = amdgpu_ring_init(adev, ring, 1024, 1403 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); 1404 if (r) 1405 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); 1406 1407 return r; 1408 } 1409 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, 1410 struct amdgpu_irq_src *irq) 1411 { 1412 amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); 1413 amdgpu_ring_fini(ring); 1414 } 1415 1416 #define MEC_HPD_SIZE 2048 1417 1418 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1419 { 1420 int r; 1421 u32 *hpd; 1422 1423 /* 1424 * we assign only 1 pipe because all other pipes will 1425 * be handled by KFD 1426 */ 1427 adev->gfx.mec.num_mec = 1; 1428 adev->gfx.mec.num_pipe = 1; 1429 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; 1430 1431 if (adev->gfx.mec.hpd_eop_obj == NULL) { 1432 r = amdgpu_bo_create(adev, 1433 adev->gfx.mec.num_queue * MEC_HPD_SIZE, 1434 PAGE_SIZE, true, 1435 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, 1436 &adev->gfx.mec.hpd_eop_obj); 1437 if (r) { 1438 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1439 return r; 1440 } 1441 } 1442 1443 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false); 1444 if (unlikely(r != 0)) { 1445 gfx_v8_0_mec_fini(adev); 1446 return r; 1447 } 1448 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT, 1449 &adev->gfx.mec.hpd_eop_gpu_addr); 1450 if (r) { 1451 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r); 1452 gfx_v8_0_mec_fini(adev); 1453 return r; 1454 } 1455 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd); 1456 if (r) { 1457 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r); 1458 gfx_v8_0_mec_fini(adev); 1459 return r; 1460 } 1461 1462 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); 1463 1464 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1465 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1466 1467 return 0; 1468 } 1469 1470 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) 1471 { 1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1473 1474 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); 1475 } 1476 1477 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) 1478 { 1479 int r; 1480 u32 *hpd; 1481 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 1482 1483 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, 1484 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, 1485 &kiq->eop_gpu_addr, (void **)&hpd); 1486 if (r) { 1487 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); 1488 return r; 1489 } 1490 1491 memset(hpd, 0, MEC_HPD_SIZE); 1492 1493 r = amdgpu_bo_reserve(kiq->eop_obj, false); 1494 if (unlikely(r != 0)) 1495 dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); 1496 amdgpu_bo_kunmap(kiq->eop_obj); 1497 amdgpu_bo_unreserve(kiq->eop_obj); 1498 1499 return 0; 1500 } 1501 1502 static const u32 vgpr_init_compute_shader[] = 1503 { 1504 0x7e000209, 0x7e020208, 1505 0x7e040207, 0x7e060206, 1506 0x7e080205, 0x7e0a0204, 1507 0x7e0c0203, 0x7e0e0202, 1508 0x7e100201, 0x7e120200, 1509 0x7e140209, 0x7e160208, 1510 0x7e180207, 0x7e1a0206, 1511 0x7e1c0205, 0x7e1e0204, 1512 0x7e200203, 0x7e220202, 1513 0x7e240201, 0x7e260200, 1514 0x7e280209, 0x7e2a0208, 1515 0x7e2c0207, 0x7e2e0206, 1516 0x7e300205, 0x7e320204, 1517 0x7e340203, 0x7e360202, 1518 0x7e380201, 0x7e3a0200, 1519 0x7e3c0209, 0x7e3e0208, 1520 0x7e400207, 0x7e420206, 1521 0x7e440205, 0x7e460204, 1522 0x7e480203, 0x7e4a0202, 1523 0x7e4c0201, 0x7e4e0200, 1524 0x7e500209, 0x7e520208, 1525 0x7e540207, 0x7e560206, 1526 0x7e580205, 0x7e5a0204, 1527 0x7e5c0203, 0x7e5e0202, 1528 0x7e600201, 0x7e620200, 1529 0x7e640209, 0x7e660208, 1530 0x7e680207, 0x7e6a0206, 1531 0x7e6c0205, 0x7e6e0204, 1532 0x7e700203, 0x7e720202, 1533 0x7e740201, 0x7e760200, 1534 0x7e780209, 0x7e7a0208, 1535 0x7e7c0207, 0x7e7e0206, 1536 0xbf8a0000, 0xbf810000, 1537 }; 1538 1539 static const u32 sgpr_init_compute_shader[] = 1540 { 1541 0xbe8a0100, 0xbe8c0102, 1542 0xbe8e0104, 0xbe900106, 1543 0xbe920108, 0xbe940100, 1544 0xbe960102, 0xbe980104, 1545 0xbe9a0106, 0xbe9c0108, 1546 0xbe9e0100, 0xbea00102, 1547 0xbea20104, 0xbea40106, 1548 0xbea60108, 0xbea80100, 1549 0xbeaa0102, 0xbeac0104, 1550 0xbeae0106, 0xbeb00108, 1551 0xbeb20100, 0xbeb40102, 1552 0xbeb60104, 0xbeb80106, 1553 0xbeba0108, 0xbebc0100, 1554 0xbebe0102, 0xbec00104, 1555 0xbec20106, 0xbec40108, 1556 0xbec60100, 0xbec80102, 1557 0xbee60004, 0xbee70005, 1558 0xbeea0006, 0xbeeb0007, 1559 0xbee80008, 0xbee90009, 1560 0xbefc0000, 0xbf8a0000, 1561 0xbf810000, 0x00000000, 1562 }; 1563 1564 static const u32 vgpr_init_regs[] = 1565 { 1566 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1567 mmCOMPUTE_RESOURCE_LIMITS, 0, 1568 mmCOMPUTE_NUM_THREAD_X, 256*4, 1569 mmCOMPUTE_NUM_THREAD_Y, 1, 1570 mmCOMPUTE_NUM_THREAD_Z, 1, 1571 mmCOMPUTE_PGM_RSRC2, 20, 1572 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1573 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1574 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1575 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1576 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1577 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1578 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1579 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1580 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1581 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1582 }; 1583 1584 static const u32 sgpr1_init_regs[] = 1585 { 1586 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1587 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1588 mmCOMPUTE_NUM_THREAD_X, 256*5, 1589 mmCOMPUTE_NUM_THREAD_Y, 1, 1590 mmCOMPUTE_NUM_THREAD_Z, 1, 1591 mmCOMPUTE_PGM_RSRC2, 20, 1592 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1593 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1594 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1595 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1596 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1597 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1598 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1599 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1600 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1601 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1602 }; 1603 1604 static const u32 sgpr2_init_regs[] = 1605 { 1606 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1607 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1608 mmCOMPUTE_NUM_THREAD_X, 256*5, 1609 mmCOMPUTE_NUM_THREAD_Y, 1, 1610 mmCOMPUTE_NUM_THREAD_Z, 1, 1611 mmCOMPUTE_PGM_RSRC2, 20, 1612 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1613 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1614 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1615 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1616 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1617 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1618 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1619 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1620 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1621 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1622 }; 1623 1624 static const u32 sec_ded_counter_registers[] = 1625 { 1626 mmCPC_EDC_ATC_CNT, 1627 mmCPC_EDC_SCRATCH_CNT, 1628 mmCPC_EDC_UCODE_CNT, 1629 mmCPF_EDC_ATC_CNT, 1630 mmCPF_EDC_ROQ_CNT, 1631 mmCPF_EDC_TAG_CNT, 1632 mmCPG_EDC_ATC_CNT, 1633 mmCPG_EDC_DMA_CNT, 1634 mmCPG_EDC_TAG_CNT, 1635 mmDC_EDC_CSINVOC_CNT, 1636 mmDC_EDC_RESTORE_CNT, 1637 mmDC_EDC_STATE_CNT, 1638 mmGDS_EDC_CNT, 1639 mmGDS_EDC_GRBM_CNT, 1640 mmGDS_EDC_OA_DED, 1641 mmSPI_EDC_CNT, 1642 mmSQC_ATC_EDC_GATCL1_CNT, 1643 mmSQC_EDC_CNT, 1644 mmSQ_EDC_DED_CNT, 1645 mmSQ_EDC_INFO, 1646 mmSQ_EDC_SEC_CNT, 1647 mmTCC_EDC_CNT, 1648 mmTCP_ATC_EDC_GATCL1_CNT, 1649 mmTCP_EDC_CNT, 1650 mmTD_EDC_CNT 1651 }; 1652 1653 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1654 { 1655 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1656 struct amdgpu_ib ib; 1657 struct dma_fence *f = NULL; 1658 int r, i; 1659 u32 tmp; 1660 unsigned total_size, vgpr_offset, sgpr_offset; 1661 u64 gpu_addr; 1662 1663 /* only supported on CZ */ 1664 if (adev->asic_type != CHIP_CARRIZO) 1665 return 0; 1666 1667 /* bail if the compute ring is not ready */ 1668 if (!ring->ready) 1669 return 0; 1670 1671 tmp = RREG32(mmGB_EDC_MODE); 1672 WREG32(mmGB_EDC_MODE, 0); 1673 1674 total_size = 1675 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1676 total_size += 1677 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1678 total_size += 1679 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1680 total_size = ALIGN(total_size, 256); 1681 vgpr_offset = total_size; 1682 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1683 sgpr_offset = total_size; 1684 total_size += sizeof(sgpr_init_compute_shader); 1685 1686 /* allocate an indirect buffer to put the commands in */ 1687 memset(&ib, 0, sizeof(ib)); 1688 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1689 if (r) { 1690 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1691 return r; 1692 } 1693 1694 /* load the compute shaders */ 1695 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1696 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1697 1698 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1699 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1700 1701 /* init the ib length to 0 */ 1702 ib.length_dw = 0; 1703 1704 /* VGPR */ 1705 /* write the register state for the compute dispatch */ 1706 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1707 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1708 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1709 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1710 } 1711 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1712 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1713 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1714 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1715 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1716 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1717 1718 /* write dispatch packet */ 1719 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1720 ib.ptr[ib.length_dw++] = 8; /* x */ 1721 ib.ptr[ib.length_dw++] = 1; /* y */ 1722 ib.ptr[ib.length_dw++] = 1; /* z */ 1723 ib.ptr[ib.length_dw++] = 1724 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1725 1726 /* write CS partial flush packet */ 1727 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1728 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1729 1730 /* SGPR1 */ 1731 /* write the register state for the compute dispatch */ 1732 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1733 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1734 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1735 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1736 } 1737 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1738 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1739 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1740 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1741 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1742 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1743 1744 /* write dispatch packet */ 1745 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1746 ib.ptr[ib.length_dw++] = 8; /* x */ 1747 ib.ptr[ib.length_dw++] = 1; /* y */ 1748 ib.ptr[ib.length_dw++] = 1; /* z */ 1749 ib.ptr[ib.length_dw++] = 1750 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1751 1752 /* write CS partial flush packet */ 1753 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1754 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1755 1756 /* SGPR2 */ 1757 /* write the register state for the compute dispatch */ 1758 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1759 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1760 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1761 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1762 } 1763 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1764 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1765 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1766 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1767 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1768 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1769 1770 /* write dispatch packet */ 1771 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1772 ib.ptr[ib.length_dw++] = 8; /* x */ 1773 ib.ptr[ib.length_dw++] = 1; /* y */ 1774 ib.ptr[ib.length_dw++] = 1; /* z */ 1775 ib.ptr[ib.length_dw++] = 1776 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1777 1778 /* write CS partial flush packet */ 1779 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1780 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1781 1782 /* shedule the ib on the ring */ 1783 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1784 if (r) { 1785 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1786 goto fail; 1787 } 1788 1789 /* wait for the GPU to finish processing the IB */ 1790 r = dma_fence_wait(f, false); 1791 if (r) { 1792 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1793 goto fail; 1794 } 1795 1796 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1797 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1798 WREG32(mmGB_EDC_MODE, tmp); 1799 1800 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1801 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1802 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1803 1804 1805 /* read back registers to clear the counters */ 1806 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1807 RREG32(sec_ded_counter_registers[i]); 1808 1809 fail: 1810 amdgpu_ib_free(adev, &ib, NULL); 1811 dma_fence_put(f); 1812 1813 return r; 1814 } 1815 1816 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1817 { 1818 u32 gb_addr_config; 1819 u32 mc_shared_chmap, mc_arb_ramcfg; 1820 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1821 u32 tmp; 1822 int ret; 1823 1824 switch (adev->asic_type) { 1825 case CHIP_TOPAZ: 1826 adev->gfx.config.max_shader_engines = 1; 1827 adev->gfx.config.max_tile_pipes = 2; 1828 adev->gfx.config.max_cu_per_sh = 6; 1829 adev->gfx.config.max_sh_per_se = 1; 1830 adev->gfx.config.max_backends_per_se = 2; 1831 adev->gfx.config.max_texture_channel_caches = 2; 1832 adev->gfx.config.max_gprs = 256; 1833 adev->gfx.config.max_gs_threads = 32; 1834 adev->gfx.config.max_hw_contexts = 8; 1835 1836 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1837 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1838 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1839 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1840 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1841 break; 1842 case CHIP_FIJI: 1843 adev->gfx.config.max_shader_engines = 4; 1844 adev->gfx.config.max_tile_pipes = 16; 1845 adev->gfx.config.max_cu_per_sh = 16; 1846 adev->gfx.config.max_sh_per_se = 1; 1847 adev->gfx.config.max_backends_per_se = 4; 1848 adev->gfx.config.max_texture_channel_caches = 16; 1849 adev->gfx.config.max_gprs = 256; 1850 adev->gfx.config.max_gs_threads = 32; 1851 adev->gfx.config.max_hw_contexts = 8; 1852 1853 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1854 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1855 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1856 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1857 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1858 break; 1859 case CHIP_POLARIS11: 1860 case CHIP_POLARIS12: 1861 ret = amdgpu_atombios_get_gfx_info(adev); 1862 if (ret) 1863 return ret; 1864 adev->gfx.config.max_gprs = 256; 1865 adev->gfx.config.max_gs_threads = 32; 1866 adev->gfx.config.max_hw_contexts = 8; 1867 1868 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1869 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1870 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1871 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1872 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1873 break; 1874 case CHIP_POLARIS10: 1875 ret = amdgpu_atombios_get_gfx_info(adev); 1876 if (ret) 1877 return ret; 1878 adev->gfx.config.max_gprs = 256; 1879 adev->gfx.config.max_gs_threads = 32; 1880 adev->gfx.config.max_hw_contexts = 8; 1881 1882 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1883 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1884 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1885 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1886 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1887 break; 1888 case CHIP_TONGA: 1889 adev->gfx.config.max_shader_engines = 4; 1890 adev->gfx.config.max_tile_pipes = 8; 1891 adev->gfx.config.max_cu_per_sh = 8; 1892 adev->gfx.config.max_sh_per_se = 1; 1893 adev->gfx.config.max_backends_per_se = 2; 1894 adev->gfx.config.max_texture_channel_caches = 8; 1895 adev->gfx.config.max_gprs = 256; 1896 adev->gfx.config.max_gs_threads = 32; 1897 adev->gfx.config.max_hw_contexts = 8; 1898 1899 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1900 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1901 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1902 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1903 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1904 break; 1905 case CHIP_CARRIZO: 1906 adev->gfx.config.max_shader_engines = 1; 1907 adev->gfx.config.max_tile_pipes = 2; 1908 adev->gfx.config.max_sh_per_se = 1; 1909 adev->gfx.config.max_backends_per_se = 2; 1910 1911 switch (adev->pdev->revision) { 1912 case 0xc4: 1913 case 0x84: 1914 case 0xc8: 1915 case 0xcc: 1916 case 0xe1: 1917 case 0xe3: 1918 /* B10 */ 1919 adev->gfx.config.max_cu_per_sh = 8; 1920 break; 1921 case 0xc5: 1922 case 0x81: 1923 case 0x85: 1924 case 0xc9: 1925 case 0xcd: 1926 case 0xe2: 1927 case 0xe4: 1928 /* B8 */ 1929 adev->gfx.config.max_cu_per_sh = 6; 1930 break; 1931 case 0xc6: 1932 case 0xca: 1933 case 0xce: 1934 case 0x88: 1935 /* B6 */ 1936 adev->gfx.config.max_cu_per_sh = 6; 1937 break; 1938 case 0xc7: 1939 case 0x87: 1940 case 0xcb: 1941 case 0xe5: 1942 case 0x89: 1943 default: 1944 /* B4 */ 1945 adev->gfx.config.max_cu_per_sh = 4; 1946 break; 1947 } 1948 1949 adev->gfx.config.max_texture_channel_caches = 2; 1950 adev->gfx.config.max_gprs = 256; 1951 adev->gfx.config.max_gs_threads = 32; 1952 adev->gfx.config.max_hw_contexts = 8; 1953 1954 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1955 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1956 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1957 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1958 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1959 break; 1960 case CHIP_STONEY: 1961 adev->gfx.config.max_shader_engines = 1; 1962 adev->gfx.config.max_tile_pipes = 2; 1963 adev->gfx.config.max_sh_per_se = 1; 1964 adev->gfx.config.max_backends_per_se = 1; 1965 1966 switch (adev->pdev->revision) { 1967 case 0xc0: 1968 case 0xc1: 1969 case 0xc2: 1970 case 0xc4: 1971 case 0xc8: 1972 case 0xc9: 1973 adev->gfx.config.max_cu_per_sh = 3; 1974 break; 1975 case 0xd0: 1976 case 0xd1: 1977 case 0xd2: 1978 default: 1979 adev->gfx.config.max_cu_per_sh = 2; 1980 break; 1981 } 1982 1983 adev->gfx.config.max_texture_channel_caches = 2; 1984 adev->gfx.config.max_gprs = 256; 1985 adev->gfx.config.max_gs_threads = 16; 1986 adev->gfx.config.max_hw_contexts = 8; 1987 1988 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1989 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1990 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1991 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1992 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1993 break; 1994 default: 1995 adev->gfx.config.max_shader_engines = 2; 1996 adev->gfx.config.max_tile_pipes = 4; 1997 adev->gfx.config.max_cu_per_sh = 2; 1998 adev->gfx.config.max_sh_per_se = 1; 1999 adev->gfx.config.max_backends_per_se = 2; 2000 adev->gfx.config.max_texture_channel_caches = 4; 2001 adev->gfx.config.max_gprs = 256; 2002 adev->gfx.config.max_gs_threads = 32; 2003 adev->gfx.config.max_hw_contexts = 8; 2004 2005 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 2006 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 2007 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 2008 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 2009 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 2010 break; 2011 } 2012 2013 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 2014 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 2015 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 2016 2017 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 2018 adev->gfx.config.mem_max_burst_length_bytes = 256; 2019 if (adev->flags & AMD_IS_APU) { 2020 /* Get memory bank mapping mode. */ 2021 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 2022 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2023 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2024 2025 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 2026 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 2027 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 2028 2029 /* Validate settings in case only one DIMM installed. */ 2030 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 2031 dimm00_addr_map = 0; 2032 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 2033 dimm01_addr_map = 0; 2034 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 2035 dimm10_addr_map = 0; 2036 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 2037 dimm11_addr_map = 0; 2038 2039 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 2040 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 2041 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 2042 adev->gfx.config.mem_row_size_in_kb = 2; 2043 else 2044 adev->gfx.config.mem_row_size_in_kb = 1; 2045 } else { 2046 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 2047 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 2048 if (adev->gfx.config.mem_row_size_in_kb > 4) 2049 adev->gfx.config.mem_row_size_in_kb = 4; 2050 } 2051 2052 adev->gfx.config.shader_engine_tile_size = 32; 2053 adev->gfx.config.num_gpus = 1; 2054 adev->gfx.config.multi_gpu_tile_size = 64; 2055 2056 /* fix up row size */ 2057 switch (adev->gfx.config.mem_row_size_in_kb) { 2058 case 1: 2059 default: 2060 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 2061 break; 2062 case 2: 2063 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 2064 break; 2065 case 4: 2066 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 2067 break; 2068 } 2069 adev->gfx.config.gb_addr_config = gb_addr_config; 2070 2071 return 0; 2072 } 2073 2074 static int gfx_v8_0_sw_init(void *handle) 2075 { 2076 int i, r; 2077 struct amdgpu_ring *ring; 2078 struct amdgpu_kiq *kiq; 2079 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2080 2081 /* KIQ event */ 2082 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); 2083 if (r) 2084 return r; 2085 2086 /* EOP Event */ 2087 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); 2088 if (r) 2089 return r; 2090 2091 /* Privileged reg */ 2092 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, 2093 &adev->gfx.priv_reg_irq); 2094 if (r) 2095 return r; 2096 2097 /* Privileged inst */ 2098 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, 2099 &adev->gfx.priv_inst_irq); 2100 if (r) 2101 return r; 2102 2103 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2104 2105 gfx_v8_0_scratch_init(adev); 2106 2107 r = gfx_v8_0_init_microcode(adev); 2108 if (r) { 2109 DRM_ERROR("Failed to load gfx firmware!\n"); 2110 return r; 2111 } 2112 2113 r = gfx_v8_0_rlc_init(adev); 2114 if (r) { 2115 DRM_ERROR("Failed to init rlc BOs!\n"); 2116 return r; 2117 } 2118 2119 r = gfx_v8_0_mec_init(adev); 2120 if (r) { 2121 DRM_ERROR("Failed to init MEC BOs!\n"); 2122 return r; 2123 } 2124 2125 /* set up the gfx ring */ 2126 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2127 ring = &adev->gfx.gfx_ring[i]; 2128 ring->ring_obj = NULL; 2129 sprintf(ring->name, "gfx"); 2130 /* no gfx doorbells on iceland */ 2131 if (adev->asic_type != CHIP_TOPAZ) { 2132 ring->use_doorbell = true; 2133 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2134 } 2135 2136 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2137 AMDGPU_CP_IRQ_GFX_EOP); 2138 if (r) 2139 return r; 2140 } 2141 2142 /* set up the compute queues */ 2143 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2144 unsigned irq_type; 2145 2146 /* max 32 queues per MEC */ 2147 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { 2148 DRM_ERROR("Too many (%d) compute rings!\n", i); 2149 break; 2150 } 2151 ring = &adev->gfx.compute_ring[i]; 2152 ring->ring_obj = NULL; 2153 ring->use_doorbell = true; 2154 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; 2155 ring->me = 1; /* first MEC */ 2156 ring->pipe = i / 8; 2157 ring->queue = i % 8; 2158 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 2159 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2160 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; 2161 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2162 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2163 irq_type); 2164 if (r) 2165 return r; 2166 } 2167 2168 if (amdgpu_sriov_vf(adev)) { 2169 r = gfx_v8_0_kiq_init(adev); 2170 if (r) { 2171 DRM_ERROR("Failed to init KIQ BOs!\n"); 2172 return r; 2173 } 2174 2175 kiq = &adev->gfx.kiq; 2176 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2177 if (r) 2178 return r; 2179 2180 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 2181 r = gfx_v8_0_compute_mqd_sw_init(adev); 2182 if (r) 2183 return r; 2184 } 2185 2186 /* reserve GDS, GWS and OA resource for gfx */ 2187 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2188 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2189 &adev->gds.gds_gfx_bo, NULL, NULL); 2190 if (r) 2191 return r; 2192 2193 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2194 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2195 &adev->gds.gws_gfx_bo, NULL, NULL); 2196 if (r) 2197 return r; 2198 2199 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2200 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2201 &adev->gds.oa_gfx_bo, NULL, NULL); 2202 if (r) 2203 return r; 2204 2205 adev->gfx.ce_ram_size = 0x8000; 2206 2207 r = gfx_v8_0_gpu_early_init(adev); 2208 if (r) 2209 return r; 2210 2211 return 0; 2212 } 2213 2214 static int gfx_v8_0_sw_fini(void *handle) 2215 { 2216 int i; 2217 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2218 2219 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2220 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2221 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2222 2223 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2224 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2225 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2226 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2227 2228 if (amdgpu_sriov_vf(adev)) { 2229 gfx_v8_0_compute_mqd_sw_fini(adev); 2230 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2231 gfx_v8_0_kiq_fini(adev); 2232 } 2233 2234 gfx_v8_0_mec_fini(adev); 2235 gfx_v8_0_rlc_fini(adev); 2236 gfx_v8_0_free_microcode(adev); 2237 2238 return 0; 2239 } 2240 2241 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2242 { 2243 uint32_t *modearray, *mod2array; 2244 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2245 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2246 u32 reg_offset; 2247 2248 modearray = adev->gfx.config.tile_mode_array; 2249 mod2array = adev->gfx.config.macrotile_mode_array; 2250 2251 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2252 modearray[reg_offset] = 0; 2253 2254 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2255 mod2array[reg_offset] = 0; 2256 2257 switch (adev->asic_type) { 2258 case CHIP_TOPAZ: 2259 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2260 PIPE_CONFIG(ADDR_SURF_P2) | 2261 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2262 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2263 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2264 PIPE_CONFIG(ADDR_SURF_P2) | 2265 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2266 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2267 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2268 PIPE_CONFIG(ADDR_SURF_P2) | 2269 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2270 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2271 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2272 PIPE_CONFIG(ADDR_SURF_P2) | 2273 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2274 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2275 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2276 PIPE_CONFIG(ADDR_SURF_P2) | 2277 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2279 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2280 PIPE_CONFIG(ADDR_SURF_P2) | 2281 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2282 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2283 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2284 PIPE_CONFIG(ADDR_SURF_P2) | 2285 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2286 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2287 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2288 PIPE_CONFIG(ADDR_SURF_P2)); 2289 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2290 PIPE_CONFIG(ADDR_SURF_P2) | 2291 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2293 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2294 PIPE_CONFIG(ADDR_SURF_P2) | 2295 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2297 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2298 PIPE_CONFIG(ADDR_SURF_P2) | 2299 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2301 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2302 PIPE_CONFIG(ADDR_SURF_P2) | 2303 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2304 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2305 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2306 PIPE_CONFIG(ADDR_SURF_P2) | 2307 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2308 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2309 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2310 PIPE_CONFIG(ADDR_SURF_P2) | 2311 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2312 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2313 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2314 PIPE_CONFIG(ADDR_SURF_P2) | 2315 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2316 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2317 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2318 PIPE_CONFIG(ADDR_SURF_P2) | 2319 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2320 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2321 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2322 PIPE_CONFIG(ADDR_SURF_P2) | 2323 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2324 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2325 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2326 PIPE_CONFIG(ADDR_SURF_P2) | 2327 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2328 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2329 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2330 PIPE_CONFIG(ADDR_SURF_P2) | 2331 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2332 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2333 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2334 PIPE_CONFIG(ADDR_SURF_P2) | 2335 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2336 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2337 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2338 PIPE_CONFIG(ADDR_SURF_P2) | 2339 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2341 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2342 PIPE_CONFIG(ADDR_SURF_P2) | 2343 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2344 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2345 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2346 PIPE_CONFIG(ADDR_SURF_P2) | 2347 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2348 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2349 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2350 PIPE_CONFIG(ADDR_SURF_P2) | 2351 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2352 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2353 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2354 PIPE_CONFIG(ADDR_SURF_P2) | 2355 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2357 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2358 PIPE_CONFIG(ADDR_SURF_P2) | 2359 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2361 2362 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2363 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2364 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2365 NUM_BANKS(ADDR_SURF_8_BANK)); 2366 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2367 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2368 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2369 NUM_BANKS(ADDR_SURF_8_BANK)); 2370 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2371 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2372 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2373 NUM_BANKS(ADDR_SURF_8_BANK)); 2374 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2375 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2376 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2377 NUM_BANKS(ADDR_SURF_8_BANK)); 2378 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2379 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2380 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2381 NUM_BANKS(ADDR_SURF_8_BANK)); 2382 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2383 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2384 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2385 NUM_BANKS(ADDR_SURF_8_BANK)); 2386 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2387 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2388 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2389 NUM_BANKS(ADDR_SURF_8_BANK)); 2390 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2391 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2392 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2393 NUM_BANKS(ADDR_SURF_16_BANK)); 2394 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2395 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2396 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2397 NUM_BANKS(ADDR_SURF_16_BANK)); 2398 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2399 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2400 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2401 NUM_BANKS(ADDR_SURF_16_BANK)); 2402 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2403 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2404 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2405 NUM_BANKS(ADDR_SURF_16_BANK)); 2406 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2407 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2408 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2409 NUM_BANKS(ADDR_SURF_16_BANK)); 2410 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2411 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2412 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2413 NUM_BANKS(ADDR_SURF_16_BANK)); 2414 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2415 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2416 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2417 NUM_BANKS(ADDR_SURF_8_BANK)); 2418 2419 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2420 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2421 reg_offset != 23) 2422 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2423 2424 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2425 if (reg_offset != 7) 2426 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2427 2428 break; 2429 case CHIP_FIJI: 2430 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2431 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2432 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2433 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2434 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2435 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2436 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2437 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2438 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2442 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2447 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2450 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2454 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2458 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2462 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2463 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2464 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2465 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2466 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2467 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2468 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2469 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2470 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2471 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2472 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2473 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2476 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2480 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2484 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2488 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2492 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2496 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2500 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2501 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2504 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2505 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2508 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2509 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2512 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2513 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2517 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2521 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2524 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2525 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2528 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2529 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2532 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2533 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2536 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2537 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2540 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2541 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2544 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2545 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2548 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2552 2553 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2554 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2555 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2556 NUM_BANKS(ADDR_SURF_8_BANK)); 2557 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2558 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2559 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2560 NUM_BANKS(ADDR_SURF_8_BANK)); 2561 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2564 NUM_BANKS(ADDR_SURF_8_BANK)); 2565 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2568 NUM_BANKS(ADDR_SURF_8_BANK)); 2569 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2572 NUM_BANKS(ADDR_SURF_8_BANK)); 2573 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2576 NUM_BANKS(ADDR_SURF_8_BANK)); 2577 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2580 NUM_BANKS(ADDR_SURF_8_BANK)); 2581 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2584 NUM_BANKS(ADDR_SURF_8_BANK)); 2585 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2588 NUM_BANKS(ADDR_SURF_8_BANK)); 2589 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2592 NUM_BANKS(ADDR_SURF_8_BANK)); 2593 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2596 NUM_BANKS(ADDR_SURF_8_BANK)); 2597 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2600 NUM_BANKS(ADDR_SURF_8_BANK)); 2601 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2604 NUM_BANKS(ADDR_SURF_8_BANK)); 2605 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2608 NUM_BANKS(ADDR_SURF_4_BANK)); 2609 2610 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2611 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2612 2613 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2614 if (reg_offset != 7) 2615 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2616 2617 break; 2618 case CHIP_TONGA: 2619 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2620 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2621 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2622 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2623 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2624 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2625 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2626 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2627 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2628 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2629 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2630 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2631 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2632 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2633 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2634 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2635 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2636 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2637 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2638 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2639 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2642 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2643 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2646 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2647 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2649 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2650 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2651 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2652 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2653 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2654 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2656 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2657 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2658 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2660 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2661 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2662 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2663 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2664 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2665 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2666 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2667 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2668 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2669 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2670 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2671 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2672 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2673 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2674 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2675 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2676 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2677 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2678 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2679 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2680 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2681 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2682 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2683 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2684 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2685 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2686 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2687 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2688 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2689 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2690 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2691 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2692 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2693 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2694 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2695 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2696 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2697 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2698 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2699 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2700 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2701 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2702 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2703 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2704 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2705 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2706 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2707 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2709 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2711 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2713 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2714 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2715 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2717 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2718 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2719 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2721 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2722 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2723 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2725 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2726 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2727 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2729 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2730 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2731 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2733 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2734 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2735 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2737 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2738 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2739 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2741 2742 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2743 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2744 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2745 NUM_BANKS(ADDR_SURF_16_BANK)); 2746 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2747 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2748 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2749 NUM_BANKS(ADDR_SURF_16_BANK)); 2750 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2751 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2752 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2753 NUM_BANKS(ADDR_SURF_16_BANK)); 2754 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2755 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2756 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2757 NUM_BANKS(ADDR_SURF_16_BANK)); 2758 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2759 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2760 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2761 NUM_BANKS(ADDR_SURF_16_BANK)); 2762 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2763 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2764 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2765 NUM_BANKS(ADDR_SURF_16_BANK)); 2766 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2769 NUM_BANKS(ADDR_SURF_16_BANK)); 2770 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2771 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2772 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2773 NUM_BANKS(ADDR_SURF_16_BANK)); 2774 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2775 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2776 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2777 NUM_BANKS(ADDR_SURF_16_BANK)); 2778 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2781 NUM_BANKS(ADDR_SURF_16_BANK)); 2782 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2785 NUM_BANKS(ADDR_SURF_16_BANK)); 2786 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2789 NUM_BANKS(ADDR_SURF_8_BANK)); 2790 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2793 NUM_BANKS(ADDR_SURF_4_BANK)); 2794 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2797 NUM_BANKS(ADDR_SURF_4_BANK)); 2798 2799 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2800 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2801 2802 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2803 if (reg_offset != 7) 2804 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2805 2806 break; 2807 case CHIP_POLARIS11: 2808 case CHIP_POLARIS12: 2809 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2810 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2811 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2812 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2813 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2814 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2815 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2816 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2817 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2818 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2819 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2820 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2821 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2822 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2823 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2824 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2825 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2826 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2827 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2828 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2829 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2832 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2833 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2836 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2837 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2840 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2841 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2843 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2844 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2846 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2847 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2848 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2850 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2851 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2852 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2854 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2855 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2856 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2858 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2859 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2860 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2862 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2863 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2864 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2865 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2866 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2867 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2868 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2869 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2870 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2871 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2872 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2873 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2874 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2875 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2876 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2877 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2878 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2879 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2880 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2881 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2882 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2883 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2884 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2885 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2886 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2887 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2888 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2889 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2890 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2891 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2892 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2893 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2894 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2895 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2896 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2897 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2898 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2899 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2900 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2901 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2902 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2903 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2904 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2905 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2906 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2907 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2908 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2909 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2911 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2912 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2913 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2914 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2915 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2916 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2917 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2918 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2919 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2920 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2921 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2923 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2924 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2925 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2926 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2927 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2928 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2929 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2930 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2931 2932 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2933 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2934 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2935 NUM_BANKS(ADDR_SURF_16_BANK)); 2936 2937 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2940 NUM_BANKS(ADDR_SURF_16_BANK)); 2941 2942 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2943 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2944 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2945 NUM_BANKS(ADDR_SURF_16_BANK)); 2946 2947 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2948 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2949 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2950 NUM_BANKS(ADDR_SURF_16_BANK)); 2951 2952 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2953 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2954 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2955 NUM_BANKS(ADDR_SURF_16_BANK)); 2956 2957 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2958 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2959 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2960 NUM_BANKS(ADDR_SURF_16_BANK)); 2961 2962 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2963 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2964 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2965 NUM_BANKS(ADDR_SURF_16_BANK)); 2966 2967 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2968 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2969 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2970 NUM_BANKS(ADDR_SURF_16_BANK)); 2971 2972 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2973 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2974 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2975 NUM_BANKS(ADDR_SURF_16_BANK)); 2976 2977 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2980 NUM_BANKS(ADDR_SURF_16_BANK)); 2981 2982 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2983 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2984 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2985 NUM_BANKS(ADDR_SURF_16_BANK)); 2986 2987 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2988 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2989 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2990 NUM_BANKS(ADDR_SURF_16_BANK)); 2991 2992 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2993 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2994 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2995 NUM_BANKS(ADDR_SURF_8_BANK)); 2996 2997 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3000 NUM_BANKS(ADDR_SURF_4_BANK)); 3001 3002 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3003 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3004 3005 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3006 if (reg_offset != 7) 3007 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3008 3009 break; 3010 case CHIP_POLARIS10: 3011 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3012 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3013 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3014 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3015 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3016 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3017 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3018 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3019 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3020 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3021 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3022 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3023 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3024 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3025 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3026 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3027 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3028 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3029 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3030 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3031 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3034 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3035 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3039 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3043 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3045 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3046 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3048 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3049 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3050 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3052 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3053 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3054 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3057 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3058 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3060 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3061 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3062 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3063 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3064 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3065 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3066 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3067 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3068 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3069 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3071 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3073 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3075 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3077 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3078 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3079 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3081 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3083 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3085 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3087 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3089 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3091 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3093 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3095 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3097 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3099 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3101 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3102 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3103 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3105 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3109 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3113 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3117 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3119 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3121 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3123 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3125 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3127 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3129 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3130 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3131 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3133 3134 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3135 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3136 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3137 NUM_BANKS(ADDR_SURF_16_BANK)); 3138 3139 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3142 NUM_BANKS(ADDR_SURF_16_BANK)); 3143 3144 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3145 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3146 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3147 NUM_BANKS(ADDR_SURF_16_BANK)); 3148 3149 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3152 NUM_BANKS(ADDR_SURF_16_BANK)); 3153 3154 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3155 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3156 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3157 NUM_BANKS(ADDR_SURF_16_BANK)); 3158 3159 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3162 NUM_BANKS(ADDR_SURF_16_BANK)); 3163 3164 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3165 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3166 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3167 NUM_BANKS(ADDR_SURF_16_BANK)); 3168 3169 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3170 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3171 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3172 NUM_BANKS(ADDR_SURF_16_BANK)); 3173 3174 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3177 NUM_BANKS(ADDR_SURF_16_BANK)); 3178 3179 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3182 NUM_BANKS(ADDR_SURF_16_BANK)); 3183 3184 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3187 NUM_BANKS(ADDR_SURF_16_BANK)); 3188 3189 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3190 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3191 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3192 NUM_BANKS(ADDR_SURF_8_BANK)); 3193 3194 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3197 NUM_BANKS(ADDR_SURF_4_BANK)); 3198 3199 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3200 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3201 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3202 NUM_BANKS(ADDR_SURF_4_BANK)); 3203 3204 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3205 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3206 3207 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3208 if (reg_offset != 7) 3209 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3210 3211 break; 3212 case CHIP_STONEY: 3213 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3214 PIPE_CONFIG(ADDR_SURF_P2) | 3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3217 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3218 PIPE_CONFIG(ADDR_SURF_P2) | 3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3221 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3222 PIPE_CONFIG(ADDR_SURF_P2) | 3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3225 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3226 PIPE_CONFIG(ADDR_SURF_P2) | 3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3229 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3233 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3237 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3238 PIPE_CONFIG(ADDR_SURF_P2) | 3239 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3240 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3241 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3242 PIPE_CONFIG(ADDR_SURF_P2)); 3243 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3244 PIPE_CONFIG(ADDR_SURF_P2) | 3245 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3247 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3248 PIPE_CONFIG(ADDR_SURF_P2) | 3249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3251 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3252 PIPE_CONFIG(ADDR_SURF_P2) | 3253 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3255 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3259 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3263 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3267 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3271 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3275 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3279 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3283 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3287 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3291 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3295 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3299 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3303 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3307 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3311 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3312 PIPE_CONFIG(ADDR_SURF_P2) | 3313 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3315 3316 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3319 NUM_BANKS(ADDR_SURF_8_BANK)); 3320 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3323 NUM_BANKS(ADDR_SURF_8_BANK)); 3324 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3327 NUM_BANKS(ADDR_SURF_8_BANK)); 3328 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3331 NUM_BANKS(ADDR_SURF_8_BANK)); 3332 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3335 NUM_BANKS(ADDR_SURF_8_BANK)); 3336 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3339 NUM_BANKS(ADDR_SURF_8_BANK)); 3340 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3343 NUM_BANKS(ADDR_SURF_8_BANK)); 3344 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3347 NUM_BANKS(ADDR_SURF_16_BANK)); 3348 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3351 NUM_BANKS(ADDR_SURF_16_BANK)); 3352 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3355 NUM_BANKS(ADDR_SURF_16_BANK)); 3356 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3359 NUM_BANKS(ADDR_SURF_16_BANK)); 3360 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3363 NUM_BANKS(ADDR_SURF_16_BANK)); 3364 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3367 NUM_BANKS(ADDR_SURF_16_BANK)); 3368 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3369 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3370 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3371 NUM_BANKS(ADDR_SURF_8_BANK)); 3372 3373 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3374 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3375 reg_offset != 23) 3376 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3377 3378 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3379 if (reg_offset != 7) 3380 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3381 3382 break; 3383 default: 3384 dev_warn(adev->dev, 3385 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3386 adev->asic_type); 3387 3388 case CHIP_CARRIZO: 3389 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3390 PIPE_CONFIG(ADDR_SURF_P2) | 3391 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3392 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3393 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3394 PIPE_CONFIG(ADDR_SURF_P2) | 3395 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3396 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3397 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3398 PIPE_CONFIG(ADDR_SURF_P2) | 3399 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3400 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3401 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3402 PIPE_CONFIG(ADDR_SURF_P2) | 3403 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3404 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3405 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3408 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3409 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3410 PIPE_CONFIG(ADDR_SURF_P2) | 3411 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3412 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3413 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3414 PIPE_CONFIG(ADDR_SURF_P2) | 3415 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3416 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3417 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3418 PIPE_CONFIG(ADDR_SURF_P2)); 3419 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3420 PIPE_CONFIG(ADDR_SURF_P2) | 3421 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3423 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3424 PIPE_CONFIG(ADDR_SURF_P2) | 3425 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3426 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3427 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3428 PIPE_CONFIG(ADDR_SURF_P2) | 3429 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3430 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3431 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3432 PIPE_CONFIG(ADDR_SURF_P2) | 3433 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3434 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3435 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3436 PIPE_CONFIG(ADDR_SURF_P2) | 3437 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3438 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3439 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3440 PIPE_CONFIG(ADDR_SURF_P2) | 3441 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3442 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3443 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3444 PIPE_CONFIG(ADDR_SURF_P2) | 3445 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3446 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3447 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3448 PIPE_CONFIG(ADDR_SURF_P2) | 3449 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3450 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3451 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3452 PIPE_CONFIG(ADDR_SURF_P2) | 3453 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3454 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3455 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3456 PIPE_CONFIG(ADDR_SURF_P2) | 3457 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3458 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3459 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3460 PIPE_CONFIG(ADDR_SURF_P2) | 3461 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3462 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3463 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3464 PIPE_CONFIG(ADDR_SURF_P2) | 3465 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3466 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3467 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3468 PIPE_CONFIG(ADDR_SURF_P2) | 3469 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3470 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3471 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3472 PIPE_CONFIG(ADDR_SURF_P2) | 3473 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3474 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3475 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3476 PIPE_CONFIG(ADDR_SURF_P2) | 3477 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3478 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3479 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3480 PIPE_CONFIG(ADDR_SURF_P2) | 3481 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3482 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3483 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3484 PIPE_CONFIG(ADDR_SURF_P2) | 3485 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3486 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3487 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3488 PIPE_CONFIG(ADDR_SURF_P2) | 3489 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3490 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3491 3492 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3493 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3494 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3495 NUM_BANKS(ADDR_SURF_8_BANK)); 3496 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3497 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3498 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3499 NUM_BANKS(ADDR_SURF_8_BANK)); 3500 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3503 NUM_BANKS(ADDR_SURF_8_BANK)); 3504 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3505 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3506 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3507 NUM_BANKS(ADDR_SURF_8_BANK)); 3508 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3509 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3510 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3511 NUM_BANKS(ADDR_SURF_8_BANK)); 3512 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3515 NUM_BANKS(ADDR_SURF_8_BANK)); 3516 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3517 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3518 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3519 NUM_BANKS(ADDR_SURF_8_BANK)); 3520 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3521 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3522 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3523 NUM_BANKS(ADDR_SURF_16_BANK)); 3524 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3525 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3526 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3527 NUM_BANKS(ADDR_SURF_16_BANK)); 3528 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3529 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3530 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3531 NUM_BANKS(ADDR_SURF_16_BANK)); 3532 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3533 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3534 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3535 NUM_BANKS(ADDR_SURF_16_BANK)); 3536 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3537 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3538 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3539 NUM_BANKS(ADDR_SURF_16_BANK)); 3540 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3541 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3542 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3543 NUM_BANKS(ADDR_SURF_16_BANK)); 3544 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3545 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3546 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3547 NUM_BANKS(ADDR_SURF_8_BANK)); 3548 3549 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3550 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3551 reg_offset != 23) 3552 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3553 3554 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3555 if (reg_offset != 7) 3556 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3557 3558 break; 3559 } 3560 } 3561 3562 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3563 u32 se_num, u32 sh_num, u32 instance) 3564 { 3565 u32 data; 3566 3567 if (instance == 0xffffffff) 3568 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3569 else 3570 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3571 3572 if (se_num == 0xffffffff) 3573 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3574 else 3575 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3576 3577 if (sh_num == 0xffffffff) 3578 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3579 else 3580 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3581 3582 WREG32(mmGRBM_GFX_INDEX, data); 3583 } 3584 3585 static u32 gfx_v8_0_create_bitmask(u32 bit_width) 3586 { 3587 return (u32)((1ULL << bit_width) - 1); 3588 } 3589 3590 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3591 { 3592 u32 data, mask; 3593 3594 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3595 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3596 3597 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3598 3599 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / 3600 adev->gfx.config.max_sh_per_se); 3601 3602 return (~data) & mask; 3603 } 3604 3605 static void 3606 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3607 { 3608 switch (adev->asic_type) { 3609 case CHIP_FIJI: 3610 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3611 RB_XSEL2(1) | PKR_MAP(2) | 3612 PKR_XSEL(1) | PKR_YSEL(1) | 3613 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3614 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3615 SE_PAIR_YSEL(2); 3616 break; 3617 case CHIP_TONGA: 3618 case CHIP_POLARIS10: 3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3620 SE_XSEL(1) | SE_YSEL(1); 3621 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3622 SE_PAIR_YSEL(2); 3623 break; 3624 case CHIP_TOPAZ: 3625 case CHIP_CARRIZO: 3626 *rconf |= RB_MAP_PKR0(2); 3627 *rconf1 |= 0x0; 3628 break; 3629 case CHIP_POLARIS11: 3630 case CHIP_POLARIS12: 3631 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3632 SE_XSEL(1) | SE_YSEL(1); 3633 *rconf1 |= 0x0; 3634 break; 3635 case CHIP_STONEY: 3636 *rconf |= 0x0; 3637 *rconf1 |= 0x0; 3638 break; 3639 default: 3640 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3641 break; 3642 } 3643 } 3644 3645 static void 3646 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3647 u32 raster_config, u32 raster_config_1, 3648 unsigned rb_mask, unsigned num_rb) 3649 { 3650 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3651 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3652 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3653 unsigned rb_per_se = num_rb / num_se; 3654 unsigned se_mask[4]; 3655 unsigned se; 3656 3657 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3658 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3659 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3660 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3661 3662 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3663 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3664 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3665 3666 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3667 (!se_mask[2] && !se_mask[3]))) { 3668 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3669 3670 if (!se_mask[0] && !se_mask[1]) { 3671 raster_config_1 |= 3672 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3673 } else { 3674 raster_config_1 |= 3675 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3676 } 3677 } 3678 3679 for (se = 0; se < num_se; se++) { 3680 unsigned raster_config_se = raster_config; 3681 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3682 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3683 int idx = (se / 2) * 2; 3684 3685 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3686 raster_config_se &= ~SE_MAP_MASK; 3687 3688 if (!se_mask[idx]) { 3689 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3690 } else { 3691 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3692 } 3693 } 3694 3695 pkr0_mask &= rb_mask; 3696 pkr1_mask &= rb_mask; 3697 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3698 raster_config_se &= ~PKR_MAP_MASK; 3699 3700 if (!pkr0_mask) { 3701 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3702 } else { 3703 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3704 } 3705 } 3706 3707 if (rb_per_se >= 2) { 3708 unsigned rb0_mask = 1 << (se * rb_per_se); 3709 unsigned rb1_mask = rb0_mask << 1; 3710 3711 rb0_mask &= rb_mask; 3712 rb1_mask &= rb_mask; 3713 if (!rb0_mask || !rb1_mask) { 3714 raster_config_se &= ~RB_MAP_PKR0_MASK; 3715 3716 if (!rb0_mask) { 3717 raster_config_se |= 3718 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3719 } else { 3720 raster_config_se |= 3721 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3722 } 3723 } 3724 3725 if (rb_per_se > 2) { 3726 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3727 rb1_mask = rb0_mask << 1; 3728 rb0_mask &= rb_mask; 3729 rb1_mask &= rb_mask; 3730 if (!rb0_mask || !rb1_mask) { 3731 raster_config_se &= ~RB_MAP_PKR1_MASK; 3732 3733 if (!rb0_mask) { 3734 raster_config_se |= 3735 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3736 } else { 3737 raster_config_se |= 3738 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3739 } 3740 } 3741 } 3742 } 3743 3744 /* GRBM_GFX_INDEX has a different offset on VI */ 3745 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3746 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3747 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3748 } 3749 3750 /* GRBM_GFX_INDEX has a different offset on VI */ 3751 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3752 } 3753 3754 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3755 { 3756 int i, j; 3757 u32 data; 3758 u32 raster_config = 0, raster_config_1 = 0; 3759 u32 active_rbs = 0; 3760 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3761 adev->gfx.config.max_sh_per_se; 3762 unsigned num_rb_pipes; 3763 3764 mutex_lock(&adev->grbm_idx_mutex); 3765 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3766 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3767 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3768 data = gfx_v8_0_get_rb_active_bitmap(adev); 3769 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3770 rb_bitmap_width_per_sh); 3771 } 3772 } 3773 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3774 3775 adev->gfx.config.backend_enable_mask = active_rbs; 3776 adev->gfx.config.num_rbs = hweight32(active_rbs); 3777 3778 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3779 adev->gfx.config.max_shader_engines, 16); 3780 3781 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3782 3783 if (!adev->gfx.config.backend_enable_mask || 3784 adev->gfx.config.num_rbs >= num_rb_pipes) { 3785 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3786 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3787 } else { 3788 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3789 adev->gfx.config.backend_enable_mask, 3790 num_rb_pipes); 3791 } 3792 3793 /* cache the values for userspace */ 3794 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3795 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3796 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3797 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3798 RREG32(mmCC_RB_BACKEND_DISABLE); 3799 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3800 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3801 adev->gfx.config.rb_config[i][j].raster_config = 3802 RREG32(mmPA_SC_RASTER_CONFIG); 3803 adev->gfx.config.rb_config[i][j].raster_config_1 = 3804 RREG32(mmPA_SC_RASTER_CONFIG_1); 3805 } 3806 } 3807 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3808 mutex_unlock(&adev->grbm_idx_mutex); 3809 } 3810 3811 /** 3812 * gfx_v8_0_init_compute_vmid - gart enable 3813 * 3814 * @rdev: amdgpu_device pointer 3815 * 3816 * Initialize compute vmid sh_mem registers 3817 * 3818 */ 3819 #define DEFAULT_SH_MEM_BASES (0x6000) 3820 #define FIRST_COMPUTE_VMID (8) 3821 #define LAST_COMPUTE_VMID (16) 3822 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3823 { 3824 int i; 3825 uint32_t sh_mem_config; 3826 uint32_t sh_mem_bases; 3827 3828 /* 3829 * Configure apertures: 3830 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3831 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3832 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3833 */ 3834 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3835 3836 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3837 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3838 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3839 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3840 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3841 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3842 3843 mutex_lock(&adev->srbm_mutex); 3844 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3845 vi_srbm_select(adev, 0, 0, 0, i); 3846 /* CP and shaders */ 3847 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3848 WREG32(mmSH_MEM_APE1_BASE, 1); 3849 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3850 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3851 } 3852 vi_srbm_select(adev, 0, 0, 0, 0); 3853 mutex_unlock(&adev->srbm_mutex); 3854 } 3855 3856 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3857 { 3858 switch (adev->asic_type) { 3859 default: 3860 adev->gfx.config.double_offchip_lds_buf = 1; 3861 break; 3862 case CHIP_CARRIZO: 3863 case CHIP_STONEY: 3864 adev->gfx.config.double_offchip_lds_buf = 0; 3865 break; 3866 } 3867 } 3868 3869 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3870 { 3871 u32 tmp, sh_static_mem_cfg; 3872 int i; 3873 3874 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3875 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3876 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3877 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3878 3879 gfx_v8_0_tiling_mode_table_init(adev); 3880 gfx_v8_0_setup_rb(adev); 3881 gfx_v8_0_get_cu_info(adev); 3882 gfx_v8_0_config_init(adev); 3883 3884 /* XXX SH_MEM regs */ 3885 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3886 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3887 SWIZZLE_ENABLE, 1); 3888 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3889 ELEMENT_SIZE, 1); 3890 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3891 INDEX_STRIDE, 3); 3892 mutex_lock(&adev->srbm_mutex); 3893 for (i = 0; i < adev->vm_manager.num_ids; i++) { 3894 vi_srbm_select(adev, 0, 0, 0, i); 3895 /* CP and shaders */ 3896 if (i == 0) { 3897 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3898 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3899 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3900 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3901 WREG32(mmSH_MEM_CONFIG, tmp); 3902 WREG32(mmSH_MEM_BASES, 0); 3903 } else { 3904 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3905 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3906 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3907 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3908 WREG32(mmSH_MEM_CONFIG, tmp); 3909 tmp = adev->mc.shared_aperture_start >> 48; 3910 WREG32(mmSH_MEM_BASES, tmp); 3911 } 3912 3913 WREG32(mmSH_MEM_APE1_BASE, 1); 3914 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3915 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3916 } 3917 vi_srbm_select(adev, 0, 0, 0, 0); 3918 mutex_unlock(&adev->srbm_mutex); 3919 3920 gfx_v8_0_init_compute_vmid(adev); 3921 3922 mutex_lock(&adev->grbm_idx_mutex); 3923 /* 3924 * making sure that the following register writes will be broadcasted 3925 * to all the shaders 3926 */ 3927 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3928 3929 WREG32(mmPA_SC_FIFO_SIZE, 3930 (adev->gfx.config.sc_prim_fifo_size_frontend << 3931 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3932 (adev->gfx.config.sc_prim_fifo_size_backend << 3933 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3934 (adev->gfx.config.sc_hiz_tile_fifo_size << 3935 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3936 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3937 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3938 3939 tmp = RREG32(mmSPI_ARB_PRIORITY); 3940 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3941 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3942 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3943 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3944 WREG32(mmSPI_ARB_PRIORITY, tmp); 3945 3946 mutex_unlock(&adev->grbm_idx_mutex); 3947 3948 } 3949 3950 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3951 { 3952 u32 i, j, k; 3953 u32 mask; 3954 3955 mutex_lock(&adev->grbm_idx_mutex); 3956 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3957 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3958 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3959 for (k = 0; k < adev->usec_timeout; k++) { 3960 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3961 break; 3962 udelay(1); 3963 } 3964 } 3965 } 3966 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3967 mutex_unlock(&adev->grbm_idx_mutex); 3968 3969 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3970 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3971 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3972 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3973 for (k = 0; k < adev->usec_timeout; k++) { 3974 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3975 break; 3976 udelay(1); 3977 } 3978 } 3979 3980 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3981 bool enable) 3982 { 3983 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3984 3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3986 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3987 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3988 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3989 3990 WREG32(mmCP_INT_CNTL_RING0, tmp); 3991 } 3992 3993 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3994 { 3995 /* csib */ 3996 WREG32(mmRLC_CSIB_ADDR_HI, 3997 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3998 WREG32(mmRLC_CSIB_ADDR_LO, 3999 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 4000 WREG32(mmRLC_CSIB_LENGTH, 4001 adev->gfx.rlc.clear_state_size); 4002 } 4003 4004 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4005 int ind_offset, 4006 int list_size, 4007 int *unique_indices, 4008 int *indices_count, 4009 int max_indices, 4010 int *ind_start_offsets, 4011 int *offset_count, 4012 int max_offset) 4013 { 4014 int indices; 4015 bool new_entry = true; 4016 4017 for (; ind_offset < list_size; ind_offset++) { 4018 4019 if (new_entry) { 4020 new_entry = false; 4021 ind_start_offsets[*offset_count] = ind_offset; 4022 *offset_count = *offset_count + 1; 4023 BUG_ON(*offset_count >= max_offset); 4024 } 4025 4026 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4027 new_entry = true; 4028 continue; 4029 } 4030 4031 ind_offset += 2; 4032 4033 /* look for the matching indice */ 4034 for (indices = 0; 4035 indices < *indices_count; 4036 indices++) { 4037 if (unique_indices[indices] == 4038 register_list_format[ind_offset]) 4039 break; 4040 } 4041 4042 if (indices >= *indices_count) { 4043 unique_indices[*indices_count] = 4044 register_list_format[ind_offset]; 4045 indices = *indices_count; 4046 *indices_count = *indices_count + 1; 4047 BUG_ON(*indices_count >= max_indices); 4048 } 4049 4050 register_list_format[ind_offset] = indices; 4051 } 4052 } 4053 4054 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4055 { 4056 int i, temp, data; 4057 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4058 int indices_count = 0; 4059 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4060 int offset_count = 0; 4061 4062 int list_size; 4063 unsigned int *register_list_format = 4064 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4065 if (!register_list_format) 4066 return -ENOMEM; 4067 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4068 adev->gfx.rlc.reg_list_format_size_bytes); 4069 4070 gfx_v8_0_parse_ind_reg_list(register_list_format, 4071 RLC_FormatDirectRegListLength, 4072 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4073 unique_indices, 4074 &indices_count, 4075 sizeof(unique_indices) / sizeof(int), 4076 indirect_start_offsets, 4077 &offset_count, 4078 sizeof(indirect_start_offsets)/sizeof(int)); 4079 4080 /* save and restore list */ 4081 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4082 4083 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4084 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4085 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4086 4087 /* indirect list */ 4088 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4089 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4090 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4091 4092 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4093 list_size = list_size >> 1; 4094 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4095 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4096 4097 /* starting offsets starts */ 4098 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4099 adev->gfx.rlc.starting_offsets_start); 4100 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) 4101 WREG32(mmRLC_GPM_SCRATCH_DATA, 4102 indirect_start_offsets[i]); 4103 4104 /* unique indices */ 4105 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4106 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4107 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { 4108 if (unique_indices[i] != 0) { 4109 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4110 WREG32(data + i, unique_indices[i] >> 20); 4111 } 4112 } 4113 kfree(register_list_format); 4114 4115 return 0; 4116 } 4117 4118 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4119 { 4120 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4121 } 4122 4123 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4124 { 4125 uint32_t data; 4126 4127 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4128 4129 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4130 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4131 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4132 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4133 WREG32(mmRLC_PG_DELAY, data); 4134 4135 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4136 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4137 4138 } 4139 4140 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4141 bool enable) 4142 { 4143 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4144 } 4145 4146 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4147 bool enable) 4148 { 4149 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4150 } 4151 4152 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4153 { 4154 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4155 } 4156 4157 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4158 { 4159 if ((adev->asic_type == CHIP_CARRIZO) || 4160 (adev->asic_type == CHIP_STONEY)) { 4161 gfx_v8_0_init_csb(adev); 4162 gfx_v8_0_init_save_restore_list(adev); 4163 gfx_v8_0_enable_save_restore_machine(adev); 4164 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4165 gfx_v8_0_init_power_gating(adev); 4166 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4167 } else if ((adev->asic_type == CHIP_POLARIS11) || 4168 (adev->asic_type == CHIP_POLARIS12)) { 4169 gfx_v8_0_init_csb(adev); 4170 gfx_v8_0_init_save_restore_list(adev); 4171 gfx_v8_0_enable_save_restore_machine(adev); 4172 gfx_v8_0_init_power_gating(adev); 4173 } 4174 4175 } 4176 4177 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4178 { 4179 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4180 4181 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4182 gfx_v8_0_wait_for_rlc_serdes(adev); 4183 } 4184 4185 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4186 { 4187 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4188 udelay(50); 4189 4190 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4191 udelay(50); 4192 } 4193 4194 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4195 { 4196 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4197 4198 /* carrizo do enable cp interrupt after cp inited */ 4199 if (!(adev->flags & AMD_IS_APU)) 4200 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4201 4202 udelay(50); 4203 } 4204 4205 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4206 { 4207 const struct rlc_firmware_header_v2_0 *hdr; 4208 const __le32 *fw_data; 4209 unsigned i, fw_size; 4210 4211 if (!adev->gfx.rlc_fw) 4212 return -EINVAL; 4213 4214 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4215 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4216 4217 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4218 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4219 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4220 4221 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4222 for (i = 0; i < fw_size; i++) 4223 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4224 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4225 4226 return 0; 4227 } 4228 4229 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4230 { 4231 int r; 4232 u32 tmp; 4233 4234 gfx_v8_0_rlc_stop(adev); 4235 4236 /* disable CG */ 4237 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4238 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4239 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4240 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4241 if (adev->asic_type == CHIP_POLARIS11 || 4242 adev->asic_type == CHIP_POLARIS10 || 4243 adev->asic_type == CHIP_POLARIS12) { 4244 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4245 tmp &= ~0x3; 4246 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4247 } 4248 4249 /* disable PG */ 4250 WREG32(mmRLC_PG_CNTL, 0); 4251 4252 gfx_v8_0_rlc_reset(adev); 4253 gfx_v8_0_init_pg(adev); 4254 4255 if (!adev->pp_enabled) { 4256 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 4257 /* legacy rlc firmware loading */ 4258 r = gfx_v8_0_rlc_load_microcode(adev); 4259 if (r) 4260 return r; 4261 } else { 4262 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 4263 AMDGPU_UCODE_ID_RLC_G); 4264 if (r) 4265 return -EINVAL; 4266 } 4267 } 4268 4269 gfx_v8_0_rlc_start(adev); 4270 4271 return 0; 4272 } 4273 4274 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4275 { 4276 int i; 4277 u32 tmp = RREG32(mmCP_ME_CNTL); 4278 4279 if (enable) { 4280 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4281 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4282 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4283 } else { 4284 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4285 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4286 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4287 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4288 adev->gfx.gfx_ring[i].ready = false; 4289 } 4290 WREG32(mmCP_ME_CNTL, tmp); 4291 udelay(50); 4292 } 4293 4294 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4295 { 4296 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4297 const struct gfx_firmware_header_v1_0 *ce_hdr; 4298 const struct gfx_firmware_header_v1_0 *me_hdr; 4299 const __le32 *fw_data; 4300 unsigned i, fw_size; 4301 4302 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4303 return -EINVAL; 4304 4305 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4306 adev->gfx.pfp_fw->data; 4307 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4308 adev->gfx.ce_fw->data; 4309 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4310 adev->gfx.me_fw->data; 4311 4312 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4313 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4314 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4315 4316 gfx_v8_0_cp_gfx_enable(adev, false); 4317 4318 /* PFP */ 4319 fw_data = (const __le32 *) 4320 (adev->gfx.pfp_fw->data + 4321 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4322 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4323 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4324 for (i = 0; i < fw_size; i++) 4325 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4326 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4327 4328 /* CE */ 4329 fw_data = (const __le32 *) 4330 (adev->gfx.ce_fw->data + 4331 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4332 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4333 WREG32(mmCP_CE_UCODE_ADDR, 0); 4334 for (i = 0; i < fw_size; i++) 4335 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4336 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4337 4338 /* ME */ 4339 fw_data = (const __le32 *) 4340 (adev->gfx.me_fw->data + 4341 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4342 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4343 WREG32(mmCP_ME_RAM_WADDR, 0); 4344 for (i = 0; i < fw_size; i++) 4345 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4346 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4347 4348 return 0; 4349 } 4350 4351 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4352 { 4353 u32 count = 0; 4354 const struct cs_section_def *sect = NULL; 4355 const struct cs_extent_def *ext = NULL; 4356 4357 /* begin clear state */ 4358 count += 2; 4359 /* context control state */ 4360 count += 3; 4361 4362 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4363 for (ext = sect->section; ext->extent != NULL; ++ext) { 4364 if (sect->id == SECT_CONTEXT) 4365 count += 2 + ext->reg_count; 4366 else 4367 return 0; 4368 } 4369 } 4370 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4371 count += 4; 4372 /* end clear state */ 4373 count += 2; 4374 /* clear state */ 4375 count += 2; 4376 4377 return count; 4378 } 4379 4380 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4381 { 4382 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4383 const struct cs_section_def *sect = NULL; 4384 const struct cs_extent_def *ext = NULL; 4385 int r, i; 4386 4387 /* init the CP */ 4388 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4389 WREG32(mmCP_ENDIAN_SWAP, 0); 4390 WREG32(mmCP_DEVICE_ID, 1); 4391 4392 gfx_v8_0_cp_gfx_enable(adev, true); 4393 4394 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4395 if (r) { 4396 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4397 return r; 4398 } 4399 4400 /* clear state buffer */ 4401 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4402 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4403 4404 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4405 amdgpu_ring_write(ring, 0x80000000); 4406 amdgpu_ring_write(ring, 0x80000000); 4407 4408 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4409 for (ext = sect->section; ext->extent != NULL; ++ext) { 4410 if (sect->id == SECT_CONTEXT) { 4411 amdgpu_ring_write(ring, 4412 PACKET3(PACKET3_SET_CONTEXT_REG, 4413 ext->reg_count)); 4414 amdgpu_ring_write(ring, 4415 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4416 for (i = 0; i < ext->reg_count; i++) 4417 amdgpu_ring_write(ring, ext->extent[i]); 4418 } 4419 } 4420 } 4421 4422 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4423 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4424 switch (adev->asic_type) { 4425 case CHIP_TONGA: 4426 case CHIP_POLARIS10: 4427 amdgpu_ring_write(ring, 0x16000012); 4428 amdgpu_ring_write(ring, 0x0000002A); 4429 break; 4430 case CHIP_POLARIS11: 4431 case CHIP_POLARIS12: 4432 amdgpu_ring_write(ring, 0x16000012); 4433 amdgpu_ring_write(ring, 0x00000000); 4434 break; 4435 case CHIP_FIJI: 4436 amdgpu_ring_write(ring, 0x3a00161a); 4437 amdgpu_ring_write(ring, 0x0000002e); 4438 break; 4439 case CHIP_CARRIZO: 4440 amdgpu_ring_write(ring, 0x00000002); 4441 amdgpu_ring_write(ring, 0x00000000); 4442 break; 4443 case CHIP_TOPAZ: 4444 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ? 4445 0x00000000 : 0x00000002); 4446 amdgpu_ring_write(ring, 0x00000000); 4447 break; 4448 case CHIP_STONEY: 4449 amdgpu_ring_write(ring, 0x00000000); 4450 amdgpu_ring_write(ring, 0x00000000); 4451 break; 4452 default: 4453 BUG(); 4454 } 4455 4456 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4457 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4458 4459 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4460 amdgpu_ring_write(ring, 0); 4461 4462 /* init the CE partitions */ 4463 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4464 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4465 amdgpu_ring_write(ring, 0x8000); 4466 amdgpu_ring_write(ring, 0x8000); 4467 4468 amdgpu_ring_commit(ring); 4469 4470 return 0; 4471 } 4472 4473 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4474 { 4475 struct amdgpu_ring *ring; 4476 u32 tmp; 4477 u32 rb_bufsz; 4478 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4479 int r; 4480 4481 /* Set the write pointer delay */ 4482 WREG32(mmCP_RB_WPTR_DELAY, 0); 4483 4484 /* set the RB to use vmid 0 */ 4485 WREG32(mmCP_RB_VMID, 0); 4486 4487 /* Set ring buffer size */ 4488 ring = &adev->gfx.gfx_ring[0]; 4489 rb_bufsz = order_base_2(ring->ring_size / 8); 4490 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4491 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4493 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4494 #ifdef __BIG_ENDIAN 4495 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4496 #endif 4497 WREG32(mmCP_RB0_CNTL, tmp); 4498 4499 /* Initialize the ring buffer's read and write pointers */ 4500 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4501 ring->wptr = 0; 4502 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4503 4504 /* set the wb address wether it's enabled or not */ 4505 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4506 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4507 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4508 4509 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4510 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4511 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4512 mdelay(1); 4513 WREG32(mmCP_RB0_CNTL, tmp); 4514 4515 rb_addr = ring->gpu_addr >> 8; 4516 WREG32(mmCP_RB0_BASE, rb_addr); 4517 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4518 4519 /* no gfx doorbells on iceland */ 4520 if (adev->asic_type != CHIP_TOPAZ) { 4521 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4522 if (ring->use_doorbell) { 4523 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4524 DOORBELL_OFFSET, ring->doorbell_index); 4525 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4526 DOORBELL_HIT, 0); 4527 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4528 DOORBELL_EN, 1); 4529 } else { 4530 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4531 DOORBELL_EN, 0); 4532 } 4533 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4534 4535 if (adev->asic_type == CHIP_TONGA) { 4536 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4537 DOORBELL_RANGE_LOWER, 4538 AMDGPU_DOORBELL_GFX_RING0); 4539 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4540 4541 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4542 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4543 } 4544 4545 } 4546 4547 /* start the ring */ 4548 amdgpu_ring_clear_ring(ring); 4549 gfx_v8_0_cp_gfx_start(adev); 4550 ring->ready = true; 4551 r = amdgpu_ring_test_ring(ring); 4552 if (r) 4553 ring->ready = false; 4554 4555 return r; 4556 } 4557 4558 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4559 { 4560 int i; 4561 4562 if (enable) { 4563 WREG32(mmCP_MEC_CNTL, 0); 4564 } else { 4565 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4566 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4567 adev->gfx.compute_ring[i].ready = false; 4568 adev->gfx.kiq.ring.ready = false; 4569 } 4570 udelay(50); 4571 } 4572 4573 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4574 { 4575 const struct gfx_firmware_header_v1_0 *mec_hdr; 4576 const __le32 *fw_data; 4577 unsigned i, fw_size; 4578 4579 if (!adev->gfx.mec_fw) 4580 return -EINVAL; 4581 4582 gfx_v8_0_cp_compute_enable(adev, false); 4583 4584 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4585 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4586 4587 fw_data = (const __le32 *) 4588 (adev->gfx.mec_fw->data + 4589 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4590 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4591 4592 /* MEC1 */ 4593 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4594 for (i = 0; i < fw_size; i++) 4595 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4596 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4597 4598 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4599 if (adev->gfx.mec2_fw) { 4600 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4601 4602 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4603 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4604 4605 fw_data = (const __le32 *) 4606 (adev->gfx.mec2_fw->data + 4607 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4608 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4609 4610 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4611 for (i = 0; i < fw_size; i++) 4612 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4613 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4614 } 4615 4616 return 0; 4617 } 4618 4619 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) 4620 { 4621 int i, r; 4622 4623 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4624 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4625 4626 if (ring->mqd_obj) { 4627 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4628 if (unlikely(r != 0)) 4629 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); 4630 4631 amdgpu_bo_unpin(ring->mqd_obj); 4632 amdgpu_bo_unreserve(ring->mqd_obj); 4633 4634 amdgpu_bo_unref(&ring->mqd_obj); 4635 ring->mqd_obj = NULL; 4636 ring->mqd_ptr = NULL; 4637 ring->mqd_gpu_addr = 0; 4638 } 4639 } 4640 } 4641 4642 /* KIQ functions */ 4643 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4644 { 4645 uint32_t tmp; 4646 struct amdgpu_device *adev = ring->adev; 4647 4648 /* tell RLC which is KIQ queue */ 4649 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4650 tmp &= 0xffffff00; 4651 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4652 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4653 tmp |= 0x80; 4654 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4655 } 4656 4657 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) 4658 { 4659 amdgpu_ring_alloc(ring, 8); 4660 /* set resources */ 4661 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4662 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4663 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ 4664 amdgpu_ring_write(ring, 0); /* queue mask hi */ 4665 amdgpu_ring_write(ring, 0); /* gws mask lo */ 4666 amdgpu_ring_write(ring, 0); /* gws mask hi */ 4667 amdgpu_ring_write(ring, 0); /* oac mask */ 4668 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ 4669 amdgpu_ring_commit(ring); 4670 udelay(50); 4671 } 4672 4673 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, 4674 struct amdgpu_ring *ring) 4675 { 4676 struct amdgpu_device *adev = kiq_ring->adev; 4677 uint64_t mqd_addr, wptr_addr; 4678 4679 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4680 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4681 amdgpu_ring_alloc(kiq_ring, 8); 4682 4683 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4684 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4685 amdgpu_ring_write(kiq_ring, 0x21010000); 4686 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | 4687 (ring->queue << 26) | 4688 (ring->pipe << 29) | 4689 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ 4690 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4691 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4692 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4693 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4694 amdgpu_ring_commit(kiq_ring); 4695 udelay(50); 4696 } 4697 4698 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4699 { 4700 struct amdgpu_device *adev = ring->adev; 4701 struct vi_mqd *mqd = ring->mqd_ptr; 4702 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4703 uint32_t tmp; 4704 4705 mqd->header = 0xC0310800; 4706 mqd->compute_pipelinestat_enable = 0x00000001; 4707 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4708 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4709 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4710 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4711 mqd->compute_misc_reserved = 0x00000003; 4712 4713 eop_base_addr = ring->eop_gpu_addr >> 8; 4714 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4715 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4716 4717 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4718 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4719 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4720 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 4721 4722 mqd->cp_hqd_eop_control = tmp; 4723 4724 /* enable doorbell? */ 4725 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4726 CP_HQD_PQ_DOORBELL_CONTROL, 4727 DOORBELL_EN, 4728 ring->use_doorbell ? 1 : 0); 4729 4730 mqd->cp_hqd_pq_doorbell_control = tmp; 4731 4732 /* disable the queue if it's active */ 4733 mqd->cp_hqd_dequeue_request = 0; 4734 mqd->cp_hqd_pq_rptr = 0; 4735 mqd->cp_hqd_pq_wptr = 0; 4736 4737 /* set the pointer to the MQD */ 4738 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4739 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4740 4741 /* set MQD vmid to 0 */ 4742 tmp = RREG32(mmCP_MQD_CONTROL); 4743 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4744 mqd->cp_mqd_control = tmp; 4745 4746 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4747 hqd_gpu_addr = ring->gpu_addr >> 8; 4748 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4749 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4750 4751 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4752 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4754 (order_base_2(ring->ring_size / 4) - 1)); 4755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4756 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4757 #ifdef __BIG_ENDIAN 4758 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4759 #endif 4760 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4761 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4762 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4763 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4764 mqd->cp_hqd_pq_control = tmp; 4765 4766 /* set the wb address whether it's enabled or not */ 4767 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4768 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4769 mqd->cp_hqd_pq_rptr_report_addr_hi = 4770 upper_32_bits(wb_gpu_addr) & 0xffff; 4771 4772 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4773 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4774 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4775 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4776 4777 tmp = 0; 4778 /* enable the doorbell if requested */ 4779 if (ring->use_doorbell) { 4780 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4781 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4782 DOORBELL_OFFSET, ring->doorbell_index); 4783 4784 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4785 DOORBELL_EN, 1); 4786 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4787 DOORBELL_SOURCE, 0); 4788 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4789 DOORBELL_HIT, 0); 4790 } 4791 4792 mqd->cp_hqd_pq_doorbell_control = tmp; 4793 4794 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4795 ring->wptr = 0; 4796 mqd->cp_hqd_pq_wptr = ring->wptr; 4797 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4798 4799 /* set the vmid for the queue */ 4800 mqd->cp_hqd_vmid = 0; 4801 4802 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4803 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4804 mqd->cp_hqd_persistent_state = tmp; 4805 4806 /* activate the queue */ 4807 mqd->cp_hqd_active = 1; 4808 4809 return 0; 4810 } 4811 4812 static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) 4813 { 4814 struct amdgpu_device *adev = ring->adev; 4815 struct vi_mqd *mqd = ring->mqd_ptr; 4816 int j; 4817 4818 /* disable wptr polling */ 4819 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4820 4821 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); 4822 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); 4823 4824 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4825 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); 4826 4827 /* enable doorbell? */ 4828 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4829 4830 /* disable the queue if it's active */ 4831 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4832 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 4833 for (j = 0; j < adev->usec_timeout; j++) { 4834 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4835 break; 4836 udelay(1); 4837 } 4838 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 4839 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 4840 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4841 } 4842 4843 /* set the pointer to the MQD */ 4844 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 4845 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 4846 4847 /* set MQD vmid to 0 */ 4848 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); 4849 4850 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4851 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 4852 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 4853 4854 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4855 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); 4856 4857 /* set the wb address whether it's enabled or not */ 4858 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 4859 mqd->cp_hqd_pq_rptr_report_addr_lo); 4860 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 4861 mqd->cp_hqd_pq_rptr_report_addr_hi); 4862 4863 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4864 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 4865 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); 4866 4867 /* enable the doorbell if requested */ 4868 if (ring->use_doorbell) { 4869 if ((adev->asic_type == CHIP_CARRIZO) || 4870 (adev->asic_type == CHIP_FIJI) || 4871 (adev->asic_type == CHIP_STONEY)) { 4872 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 4873 AMDGPU_DOORBELL_KIQ << 2); 4874 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 4875 AMDGPU_DOORBELL_MEC_RING7 << 2); 4876 } 4877 } 4878 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); 4879 4880 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4881 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 4882 4883 /* set the vmid for the queue */ 4884 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 4885 4886 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); 4887 4888 /* activate the queue */ 4889 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 4890 4891 if (ring->use_doorbell) 4892 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4893 4894 return 0; 4895 } 4896 4897 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4898 { 4899 struct amdgpu_device *adev = ring->adev; 4900 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 4901 struct vi_mqd *mqd = ring->mqd_ptr; 4902 bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); 4903 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4904 4905 if (is_kiq) { 4906 gfx_v8_0_kiq_setting(&kiq->ring); 4907 } else { 4908 mqd_idx = ring - &adev->gfx.compute_ring[0]; 4909 } 4910 4911 if (!adev->gfx.in_reset) { 4912 memset((void *)mqd, 0, sizeof(*mqd)); 4913 mutex_lock(&adev->srbm_mutex); 4914 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4915 gfx_v8_0_mqd_init(ring); 4916 if (is_kiq) 4917 gfx_v8_0_kiq_init_register(ring); 4918 vi_srbm_select(adev, 0, 0, 0, 0); 4919 mutex_unlock(&adev->srbm_mutex); 4920 4921 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4922 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4923 } else { /* for GPU_RESET case */ 4924 /* reset MQD to a clean status */ 4925 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4926 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4927 4928 /* reset ring buffer */ 4929 ring->wptr = 0; 4930 amdgpu_ring_clear_ring(ring); 4931 4932 if (is_kiq) { 4933 mutex_lock(&adev->srbm_mutex); 4934 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4935 gfx_v8_0_kiq_init_register(ring); 4936 vi_srbm_select(adev, 0, 0, 0, 0); 4937 mutex_unlock(&adev->srbm_mutex); 4938 } 4939 } 4940 4941 if (is_kiq) 4942 gfx_v8_0_kiq_enable(ring); 4943 else 4944 gfx_v8_0_map_queue_enable(&kiq->ring, ring); 4945 4946 return 0; 4947 } 4948 4949 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4950 { 4951 struct amdgpu_ring *ring = NULL; 4952 int r = 0, i; 4953 4954 gfx_v8_0_cp_compute_enable(adev, true); 4955 4956 ring = &adev->gfx.kiq.ring; 4957 4958 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4959 if (unlikely(r != 0)) 4960 goto done; 4961 4962 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4963 if (!r) { 4964 r = gfx_v8_0_kiq_init_queue(ring); 4965 amdgpu_bo_kunmap(ring->mqd_obj); 4966 ring->mqd_ptr = NULL; 4967 } 4968 amdgpu_bo_unreserve(ring->mqd_obj); 4969 if (r) 4970 goto done; 4971 4972 ring->ready = true; 4973 r = amdgpu_ring_test_ring(ring); 4974 if (r) { 4975 ring->ready = false; 4976 goto done; 4977 } 4978 4979 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4980 ring = &adev->gfx.compute_ring[i]; 4981 4982 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4983 if (unlikely(r != 0)) 4984 goto done; 4985 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4986 if (!r) { 4987 r = gfx_v8_0_kiq_init_queue(ring); 4988 amdgpu_bo_kunmap(ring->mqd_obj); 4989 ring->mqd_ptr = NULL; 4990 } 4991 amdgpu_bo_unreserve(ring->mqd_obj); 4992 if (r) 4993 goto done; 4994 4995 ring->ready = true; 4996 r = amdgpu_ring_test_ring(ring); 4997 if (r) 4998 ring->ready = false; 4999 } 5000 5001 done: 5002 return r; 5003 } 5004 5005 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 5006 { 5007 int r, i, j; 5008 u32 tmp; 5009 bool use_doorbell = true; 5010 u64 hqd_gpu_addr; 5011 u64 mqd_gpu_addr; 5012 u64 eop_gpu_addr; 5013 u64 wb_gpu_addr; 5014 u32 *buf; 5015 struct vi_mqd *mqd; 5016 5017 /* init the queues. */ 5018 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5019 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5020 5021 if (ring->mqd_obj == NULL) { 5022 r = amdgpu_bo_create(adev, 5023 sizeof(struct vi_mqd), 5024 PAGE_SIZE, true, 5025 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, 5026 NULL, &ring->mqd_obj); 5027 if (r) { 5028 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); 5029 return r; 5030 } 5031 } 5032 5033 r = amdgpu_bo_reserve(ring->mqd_obj, false); 5034 if (unlikely(r != 0)) { 5035 gfx_v8_0_cp_compute_fini(adev); 5036 return r; 5037 } 5038 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, 5039 &mqd_gpu_addr); 5040 if (r) { 5041 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); 5042 gfx_v8_0_cp_compute_fini(adev); 5043 return r; 5044 } 5045 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); 5046 if (r) { 5047 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); 5048 gfx_v8_0_cp_compute_fini(adev); 5049 return r; 5050 } 5051 5052 /* init the mqd struct */ 5053 memset(buf, 0, sizeof(struct vi_mqd)); 5054 5055 mqd = (struct vi_mqd *)buf; 5056 mqd->header = 0xC0310800; 5057 mqd->compute_pipelinestat_enable = 0x00000001; 5058 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 5059 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 5060 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 5061 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 5062 mqd->compute_misc_reserved = 0x00000003; 5063 5064 mutex_lock(&adev->srbm_mutex); 5065 vi_srbm_select(adev, ring->me, 5066 ring->pipe, 5067 ring->queue, 0); 5068 5069 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); 5070 eop_gpu_addr >>= 8; 5071 5072 /* write the EOP addr */ 5073 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); 5074 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); 5075 5076 /* set the VMID assigned */ 5077 WREG32(mmCP_HQD_VMID, 0); 5078 5079 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 5080 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 5081 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 5082 (order_base_2(MEC_HPD_SIZE / 4) - 1)); 5083 WREG32(mmCP_HQD_EOP_CONTROL, tmp); 5084 5085 /* disable wptr polling */ 5086 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); 5087 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); 5088 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); 5089 5090 mqd->cp_hqd_eop_base_addr_lo = 5091 RREG32(mmCP_HQD_EOP_BASE_ADDR); 5092 mqd->cp_hqd_eop_base_addr_hi = 5093 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); 5094 5095 /* enable doorbell? */ 5096 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5097 if (use_doorbell) { 5098 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5099 } else { 5100 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); 5101 } 5102 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); 5103 mqd->cp_hqd_pq_doorbell_control = tmp; 5104 5105 /* disable the queue if it's active */ 5106 mqd->cp_hqd_dequeue_request = 0; 5107 mqd->cp_hqd_pq_rptr = 0; 5108 mqd->cp_hqd_pq_wptr= 0; 5109 if (RREG32(mmCP_HQD_ACTIVE) & 1) { 5110 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); 5111 for (j = 0; j < adev->usec_timeout; j++) { 5112 if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) 5113 break; 5114 udelay(1); 5115 } 5116 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); 5117 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); 5118 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5119 } 5120 5121 /* set the pointer to the MQD */ 5122 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; 5123 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); 5124 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); 5125 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); 5126 5127 /* set MQD vmid to 0 */ 5128 tmp = RREG32(mmCP_MQD_CONTROL); 5129 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 5130 WREG32(mmCP_MQD_CONTROL, tmp); 5131 mqd->cp_mqd_control = tmp; 5132 5133 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 5134 hqd_gpu_addr = ring->gpu_addr >> 8; 5135 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 5136 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 5137 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); 5138 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); 5139 5140 /* set up the HQD, this is similar to CP_RB0_CNTL */ 5141 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 5142 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 5143 (order_base_2(ring->ring_size / 4) - 1)); 5144 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 5145 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 5146 #ifdef __BIG_ENDIAN 5147 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 5148 #endif 5149 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 5150 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 5151 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 5152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 5153 WREG32(mmCP_HQD_PQ_CONTROL, tmp); 5154 mqd->cp_hqd_pq_control = tmp; 5155 5156 /* set the wb address wether it's enabled or not */ 5157 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 5158 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 5159 mqd->cp_hqd_pq_rptr_report_addr_hi = 5160 upper_32_bits(wb_gpu_addr) & 0xffff; 5161 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, 5162 mqd->cp_hqd_pq_rptr_report_addr_lo); 5163 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, 5164 mqd->cp_hqd_pq_rptr_report_addr_hi); 5165 5166 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 5167 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 5168 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 5169 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 5170 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); 5171 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, 5172 mqd->cp_hqd_pq_wptr_poll_addr_hi); 5173 5174 /* enable the doorbell if requested */ 5175 if (use_doorbell) { 5176 if ((adev->asic_type == CHIP_CARRIZO) || 5177 (adev->asic_type == CHIP_FIJI) || 5178 (adev->asic_type == CHIP_STONEY) || 5179 (adev->asic_type == CHIP_POLARIS11) || 5180 (adev->asic_type == CHIP_POLARIS10) || 5181 (adev->asic_type == CHIP_POLARIS12)) { 5182 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, 5183 AMDGPU_DOORBELL_KIQ << 2); 5184 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, 5185 AMDGPU_DOORBELL_MEC_RING7 << 2); 5186 } 5187 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 5188 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 5189 DOORBELL_OFFSET, ring->doorbell_index); 5190 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); 5191 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); 5192 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); 5193 mqd->cp_hqd_pq_doorbell_control = tmp; 5194 5195 } else { 5196 mqd->cp_hqd_pq_doorbell_control = 0; 5197 } 5198 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 5199 mqd->cp_hqd_pq_doorbell_control); 5200 5201 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 5202 ring->wptr = 0; 5203 mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); 5204 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); 5205 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 5206 5207 /* set the vmid for the queue */ 5208 mqd->cp_hqd_vmid = 0; 5209 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); 5210 5211 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 5212 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 5213 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); 5214 mqd->cp_hqd_persistent_state = tmp; 5215 if (adev->asic_type == CHIP_STONEY || 5216 adev->asic_type == CHIP_POLARIS11 || 5217 adev->asic_type == CHIP_POLARIS10 || 5218 adev->asic_type == CHIP_POLARIS12) { 5219 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); 5220 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); 5221 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); 5222 } 5223 5224 /* activate the queue */ 5225 mqd->cp_hqd_active = 1; 5226 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); 5227 5228 vi_srbm_select(adev, 0, 0, 0, 0); 5229 mutex_unlock(&adev->srbm_mutex); 5230 5231 amdgpu_bo_kunmap(ring->mqd_obj); 5232 amdgpu_bo_unreserve(ring->mqd_obj); 5233 } 5234 5235 if (use_doorbell) { 5236 tmp = RREG32(mmCP_PQ_STATUS); 5237 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); 5238 WREG32(mmCP_PQ_STATUS, tmp); 5239 } 5240 5241 gfx_v8_0_cp_compute_enable(adev, true); 5242 5243 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5244 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5245 5246 ring->ready = true; 5247 r = amdgpu_ring_test_ring(ring); 5248 if (r) 5249 ring->ready = false; 5250 } 5251 5252 return 0; 5253 } 5254 5255 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5256 { 5257 int r; 5258 5259 if (!(adev->flags & AMD_IS_APU)) 5260 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5261 5262 if (!adev->pp_enabled) { 5263 if (adev->firmware.load_type != AMDGPU_FW_LOAD_SMU) { 5264 /* legacy firmware loading */ 5265 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5266 if (r) 5267 return r; 5268 5269 r = gfx_v8_0_cp_compute_load_microcode(adev); 5270 if (r) 5271 return r; 5272 } else { 5273 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5274 AMDGPU_UCODE_ID_CP_CE); 5275 if (r) 5276 return -EINVAL; 5277 5278 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5279 AMDGPU_UCODE_ID_CP_PFP); 5280 if (r) 5281 return -EINVAL; 5282 5283 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5284 AMDGPU_UCODE_ID_CP_ME); 5285 if (r) 5286 return -EINVAL; 5287 5288 if (adev->asic_type == CHIP_TOPAZ) { 5289 r = gfx_v8_0_cp_compute_load_microcode(adev); 5290 if (r) 5291 return r; 5292 } else { 5293 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev, 5294 AMDGPU_UCODE_ID_CP_MEC1); 5295 if (r) 5296 return -EINVAL; 5297 } 5298 } 5299 } 5300 5301 r = gfx_v8_0_cp_gfx_resume(adev); 5302 if (r) 5303 return r; 5304 5305 if (amdgpu_sriov_vf(adev)) 5306 r = gfx_v8_0_kiq_resume(adev); 5307 else 5308 r = gfx_v8_0_cp_compute_resume(adev); 5309 if (r) 5310 return r; 5311 5312 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5313 5314 return 0; 5315 } 5316 5317 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5318 { 5319 gfx_v8_0_cp_gfx_enable(adev, enable); 5320 gfx_v8_0_cp_compute_enable(adev, enable); 5321 } 5322 5323 static int gfx_v8_0_hw_init(void *handle) 5324 { 5325 int r; 5326 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5327 5328 gfx_v8_0_init_golden_registers(adev); 5329 gfx_v8_0_gpu_init(adev); 5330 5331 r = gfx_v8_0_rlc_resume(adev); 5332 if (r) 5333 return r; 5334 5335 r = gfx_v8_0_cp_resume(adev); 5336 5337 return r; 5338 } 5339 5340 static int gfx_v8_0_hw_fini(void *handle) 5341 { 5342 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5343 5344 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5345 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5346 if (amdgpu_sriov_vf(adev)) { 5347 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5348 return 0; 5349 } 5350 gfx_v8_0_cp_enable(adev, false); 5351 gfx_v8_0_rlc_stop(adev); 5352 gfx_v8_0_cp_compute_fini(adev); 5353 5354 amdgpu_set_powergating_state(adev, 5355 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); 5356 5357 return 0; 5358 } 5359 5360 static int gfx_v8_0_suspend(void *handle) 5361 { 5362 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5363 5364 return gfx_v8_0_hw_fini(adev); 5365 } 5366 5367 static int gfx_v8_0_resume(void *handle) 5368 { 5369 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5370 5371 return gfx_v8_0_hw_init(adev); 5372 } 5373 5374 static bool gfx_v8_0_is_idle(void *handle) 5375 { 5376 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5377 5378 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5379 return false; 5380 else 5381 return true; 5382 } 5383 5384 static int gfx_v8_0_wait_for_idle(void *handle) 5385 { 5386 unsigned i; 5387 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5388 5389 for (i = 0; i < adev->usec_timeout; i++) { 5390 if (gfx_v8_0_is_idle(handle)) 5391 return 0; 5392 5393 udelay(1); 5394 } 5395 return -ETIMEDOUT; 5396 } 5397 5398 static bool gfx_v8_0_check_soft_reset(void *handle) 5399 { 5400 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5401 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5402 u32 tmp; 5403 5404 /* GRBM_STATUS */ 5405 tmp = RREG32(mmGRBM_STATUS); 5406 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5407 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5408 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5409 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5410 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5411 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5412 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5413 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5414 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5415 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5416 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5417 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5418 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5419 } 5420 5421 /* GRBM_STATUS2 */ 5422 tmp = RREG32(mmGRBM_STATUS2); 5423 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5424 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5425 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5426 5427 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5428 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5429 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5430 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5431 SOFT_RESET_CPF, 1); 5432 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5433 SOFT_RESET_CPC, 1); 5434 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5435 SOFT_RESET_CPG, 1); 5436 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5437 SOFT_RESET_GRBM, 1); 5438 } 5439 5440 /* SRBM_STATUS */ 5441 tmp = RREG32(mmSRBM_STATUS); 5442 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5443 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5444 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5445 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5446 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5447 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5448 5449 if (grbm_soft_reset || srbm_soft_reset) { 5450 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5451 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5452 return true; 5453 } else { 5454 adev->gfx.grbm_soft_reset = 0; 5455 adev->gfx.srbm_soft_reset = 0; 5456 return false; 5457 } 5458 } 5459 5460 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, 5461 struct amdgpu_ring *ring) 5462 { 5463 int i; 5464 5465 mutex_lock(&adev->srbm_mutex); 5466 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5467 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 5468 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); 5469 for (i = 0; i < adev->usec_timeout; i++) { 5470 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 5471 break; 5472 udelay(1); 5473 } 5474 } 5475 vi_srbm_select(adev, 0, 0, 0, 0); 5476 mutex_unlock(&adev->srbm_mutex); 5477 } 5478 5479 static int gfx_v8_0_pre_soft_reset(void *handle) 5480 { 5481 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5482 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5483 5484 if ((!adev->gfx.grbm_soft_reset) && 5485 (!adev->gfx.srbm_soft_reset)) 5486 return 0; 5487 5488 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5489 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5490 5491 /* stop the rlc */ 5492 gfx_v8_0_rlc_stop(adev); 5493 5494 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5495 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5496 /* Disable GFX parsing/prefetching */ 5497 gfx_v8_0_cp_gfx_enable(adev, false); 5498 5499 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5500 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5501 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5502 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5503 int i; 5504 5505 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5506 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5507 5508 gfx_v8_0_inactive_hqd(adev, ring); 5509 } 5510 /* Disable MEC parsing/prefetching */ 5511 gfx_v8_0_cp_compute_enable(adev, false); 5512 } 5513 5514 return 0; 5515 } 5516 5517 static int gfx_v8_0_soft_reset(void *handle) 5518 { 5519 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5520 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5521 u32 tmp; 5522 5523 if ((!adev->gfx.grbm_soft_reset) && 5524 (!adev->gfx.srbm_soft_reset)) 5525 return 0; 5526 5527 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5528 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5529 5530 if (grbm_soft_reset || srbm_soft_reset) { 5531 tmp = RREG32(mmGMCON_DEBUG); 5532 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5533 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5534 WREG32(mmGMCON_DEBUG, tmp); 5535 udelay(50); 5536 } 5537 5538 if (grbm_soft_reset) { 5539 tmp = RREG32(mmGRBM_SOFT_RESET); 5540 tmp |= grbm_soft_reset; 5541 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5542 WREG32(mmGRBM_SOFT_RESET, tmp); 5543 tmp = RREG32(mmGRBM_SOFT_RESET); 5544 5545 udelay(50); 5546 5547 tmp &= ~grbm_soft_reset; 5548 WREG32(mmGRBM_SOFT_RESET, tmp); 5549 tmp = RREG32(mmGRBM_SOFT_RESET); 5550 } 5551 5552 if (srbm_soft_reset) { 5553 tmp = RREG32(mmSRBM_SOFT_RESET); 5554 tmp |= srbm_soft_reset; 5555 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5556 WREG32(mmSRBM_SOFT_RESET, tmp); 5557 tmp = RREG32(mmSRBM_SOFT_RESET); 5558 5559 udelay(50); 5560 5561 tmp &= ~srbm_soft_reset; 5562 WREG32(mmSRBM_SOFT_RESET, tmp); 5563 tmp = RREG32(mmSRBM_SOFT_RESET); 5564 } 5565 5566 if (grbm_soft_reset || srbm_soft_reset) { 5567 tmp = RREG32(mmGMCON_DEBUG); 5568 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5569 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5570 WREG32(mmGMCON_DEBUG, tmp); 5571 } 5572 5573 /* Wait a little for things to settle down */ 5574 udelay(50); 5575 5576 return 0; 5577 } 5578 5579 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, 5580 struct amdgpu_ring *ring) 5581 { 5582 mutex_lock(&adev->srbm_mutex); 5583 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5584 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 5585 WREG32(mmCP_HQD_PQ_RPTR, 0); 5586 WREG32(mmCP_HQD_PQ_WPTR, 0); 5587 vi_srbm_select(adev, 0, 0, 0, 0); 5588 mutex_unlock(&adev->srbm_mutex); 5589 } 5590 5591 static int gfx_v8_0_post_soft_reset(void *handle) 5592 { 5593 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5594 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5595 5596 if ((!adev->gfx.grbm_soft_reset) && 5597 (!adev->gfx.srbm_soft_reset)) 5598 return 0; 5599 5600 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5601 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5602 5603 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5604 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5605 gfx_v8_0_cp_gfx_resume(adev); 5606 5607 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5608 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5609 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5610 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5611 int i; 5612 5613 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5614 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5615 5616 gfx_v8_0_init_hqd(adev, ring); 5617 } 5618 gfx_v8_0_cp_compute_resume(adev); 5619 } 5620 gfx_v8_0_rlc_start(adev); 5621 5622 return 0; 5623 } 5624 5625 /** 5626 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5627 * 5628 * @adev: amdgpu_device pointer 5629 * 5630 * Fetches a GPU clock counter snapshot. 5631 * Returns the 64 bit clock counter snapshot. 5632 */ 5633 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5634 { 5635 uint64_t clock; 5636 5637 mutex_lock(&adev->gfx.gpu_clock_mutex); 5638 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5639 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5640 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5641 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5642 return clock; 5643 } 5644 5645 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5646 uint32_t vmid, 5647 uint32_t gds_base, uint32_t gds_size, 5648 uint32_t gws_base, uint32_t gws_size, 5649 uint32_t oa_base, uint32_t oa_size) 5650 { 5651 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5652 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5653 5654 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5655 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5656 5657 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5658 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5659 5660 /* GDS Base */ 5661 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5662 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5663 WRITE_DATA_DST_SEL(0))); 5664 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5665 amdgpu_ring_write(ring, 0); 5666 amdgpu_ring_write(ring, gds_base); 5667 5668 /* GDS Size */ 5669 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5670 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5671 WRITE_DATA_DST_SEL(0))); 5672 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5673 amdgpu_ring_write(ring, 0); 5674 amdgpu_ring_write(ring, gds_size); 5675 5676 /* GWS */ 5677 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5678 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5679 WRITE_DATA_DST_SEL(0))); 5680 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5681 amdgpu_ring_write(ring, 0); 5682 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5683 5684 /* OA */ 5685 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5686 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5687 WRITE_DATA_DST_SEL(0))); 5688 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5689 amdgpu_ring_write(ring, 0); 5690 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5691 } 5692 5693 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5694 { 5695 WREG32(mmSQ_IND_INDEX, 5696 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5697 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5698 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5699 (SQ_IND_INDEX__FORCE_READ_MASK)); 5700 return RREG32(mmSQ_IND_DATA); 5701 } 5702 5703 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5704 uint32_t wave, uint32_t thread, 5705 uint32_t regno, uint32_t num, uint32_t *out) 5706 { 5707 WREG32(mmSQ_IND_INDEX, 5708 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5709 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5710 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5711 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5712 (SQ_IND_INDEX__FORCE_READ_MASK) | 5713 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5714 while (num--) 5715 *(out++) = RREG32(mmSQ_IND_DATA); 5716 } 5717 5718 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5719 { 5720 /* type 0 wave data */ 5721 dst[(*no_fields)++] = 0; 5722 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5723 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5724 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5725 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5726 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5727 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5728 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5729 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5730 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5731 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5732 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5733 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5734 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5735 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5736 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5737 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5738 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5739 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5740 } 5741 5742 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5743 uint32_t wave, uint32_t start, 5744 uint32_t size, uint32_t *dst) 5745 { 5746 wave_read_regs( 5747 adev, simd, wave, 0, 5748 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5749 } 5750 5751 5752 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5753 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5754 .select_se_sh = &gfx_v8_0_select_se_sh, 5755 .read_wave_data = &gfx_v8_0_read_wave_data, 5756 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5757 }; 5758 5759 static int gfx_v8_0_early_init(void *handle) 5760 { 5761 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5762 5763 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5764 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; 5765 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5766 gfx_v8_0_set_ring_funcs(adev); 5767 gfx_v8_0_set_irq_funcs(adev); 5768 gfx_v8_0_set_gds_init(adev); 5769 gfx_v8_0_set_rlc_funcs(adev); 5770 5771 return 0; 5772 } 5773 5774 static int gfx_v8_0_late_init(void *handle) 5775 { 5776 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5777 int r; 5778 5779 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5780 if (r) 5781 return r; 5782 5783 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5784 if (r) 5785 return r; 5786 5787 /* requires IBs so do in late init after IB pool is initialized */ 5788 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5789 if (r) 5790 return r; 5791 5792 amdgpu_set_powergating_state(adev, 5793 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE); 5794 5795 return 0; 5796 } 5797 5798 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5799 bool enable) 5800 { 5801 if ((adev->asic_type == CHIP_POLARIS11) || 5802 (adev->asic_type == CHIP_POLARIS12)) 5803 /* Send msg to SMU via Powerplay */ 5804 amdgpu_set_powergating_state(adev, 5805 AMD_IP_BLOCK_TYPE_SMC, 5806 enable ? 5807 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE); 5808 5809 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5810 } 5811 5812 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5813 bool enable) 5814 { 5815 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5816 } 5817 5818 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5819 bool enable) 5820 { 5821 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5822 } 5823 5824 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5825 bool enable) 5826 { 5827 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5828 } 5829 5830 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5831 bool enable) 5832 { 5833 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5834 5835 /* Read any GFX register to wake up GFX. */ 5836 if (!enable) 5837 RREG32(mmDB_RENDER_CONTROL); 5838 } 5839 5840 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5841 bool enable) 5842 { 5843 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5844 cz_enable_gfx_cg_power_gating(adev, true); 5845 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5846 cz_enable_gfx_pipeline_power_gating(adev, true); 5847 } else { 5848 cz_enable_gfx_cg_power_gating(adev, false); 5849 cz_enable_gfx_pipeline_power_gating(adev, false); 5850 } 5851 } 5852 5853 static int gfx_v8_0_set_powergating_state(void *handle, 5854 enum amd_powergating_state state) 5855 { 5856 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5857 bool enable = (state == AMD_PG_STATE_GATE); 5858 5859 if (amdgpu_sriov_vf(adev)) 5860 return 0; 5861 5862 switch (adev->asic_type) { 5863 case CHIP_CARRIZO: 5864 case CHIP_STONEY: 5865 5866 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5867 cz_enable_sck_slow_down_on_power_up(adev, true); 5868 cz_enable_sck_slow_down_on_power_down(adev, true); 5869 } else { 5870 cz_enable_sck_slow_down_on_power_up(adev, false); 5871 cz_enable_sck_slow_down_on_power_down(adev, false); 5872 } 5873 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5874 cz_enable_cp_power_gating(adev, true); 5875 else 5876 cz_enable_cp_power_gating(adev, false); 5877 5878 cz_update_gfx_cg_power_gating(adev, enable); 5879 5880 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5881 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5882 else 5883 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5884 5885 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5886 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5887 else 5888 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5889 break; 5890 case CHIP_POLARIS11: 5891 case CHIP_POLARIS12: 5892 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5893 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5894 else 5895 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5896 5897 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5898 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5899 else 5900 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5901 5902 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5903 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5904 else 5905 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5906 break; 5907 default: 5908 break; 5909 } 5910 5911 return 0; 5912 } 5913 5914 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5915 { 5916 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5917 int data; 5918 5919 if (amdgpu_sriov_vf(adev)) 5920 *flags = 0; 5921 5922 /* AMD_CG_SUPPORT_GFX_MGCG */ 5923 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5924 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5925 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5926 5927 /* AMD_CG_SUPPORT_GFX_CGLG */ 5928 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5929 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5930 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5931 5932 /* AMD_CG_SUPPORT_GFX_CGLS */ 5933 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5934 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5935 5936 /* AMD_CG_SUPPORT_GFX_CGTS */ 5937 data = RREG32(mmCGTS_SM_CTRL_REG); 5938 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5939 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5940 5941 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5942 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5943 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5944 5945 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5946 data = RREG32(mmRLC_MEM_SLP_CNTL); 5947 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5948 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5949 5950 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5951 data = RREG32(mmCP_MEM_SLP_CNTL); 5952 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5953 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5954 } 5955 5956 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5957 uint32_t reg_addr, uint32_t cmd) 5958 { 5959 uint32_t data; 5960 5961 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5962 5963 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5964 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5965 5966 data = RREG32(mmRLC_SERDES_WR_CTRL); 5967 if (adev->asic_type == CHIP_STONEY) 5968 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5969 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5970 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5971 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5972 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5973 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5974 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5975 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5976 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5977 else 5978 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5979 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5980 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5981 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5982 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5983 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5984 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5985 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5986 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5987 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5988 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5989 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5990 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5991 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5992 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5993 5994 WREG32(mmRLC_SERDES_WR_CTRL, data); 5995 } 5996 5997 #define MSG_ENTER_RLC_SAFE_MODE 1 5998 #define MSG_EXIT_RLC_SAFE_MODE 0 5999 #define RLC_GPR_REG2__REQ_MASK 0x00000001 6000 #define RLC_GPR_REG2__REQ__SHIFT 0 6001 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 6002 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 6003 6004 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 6005 { 6006 u32 data; 6007 unsigned i; 6008 6009 data = RREG32(mmRLC_CNTL); 6010 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6011 return; 6012 6013 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6014 data |= RLC_SAFE_MODE__CMD_MASK; 6015 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6016 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 6017 WREG32(mmRLC_SAFE_MODE, data); 6018 6019 for (i = 0; i < adev->usec_timeout; i++) { 6020 if ((RREG32(mmRLC_GPM_STAT) & 6021 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6022 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 6023 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 6024 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 6025 break; 6026 udelay(1); 6027 } 6028 6029 for (i = 0; i < adev->usec_timeout; i++) { 6030 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6031 break; 6032 udelay(1); 6033 } 6034 adev->gfx.rlc.in_safe_mode = true; 6035 } 6036 } 6037 6038 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 6039 { 6040 u32 data = 0; 6041 unsigned i; 6042 6043 data = RREG32(mmRLC_CNTL); 6044 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 6045 return; 6046 6047 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 6048 if (adev->gfx.rlc.in_safe_mode) { 6049 data |= RLC_SAFE_MODE__CMD_MASK; 6050 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 6051 WREG32(mmRLC_SAFE_MODE, data); 6052 adev->gfx.rlc.in_safe_mode = false; 6053 } 6054 } 6055 6056 for (i = 0; i < adev->usec_timeout; i++) { 6057 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 6058 break; 6059 udelay(1); 6060 } 6061 } 6062 6063 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 6064 .enter_safe_mode = iceland_enter_rlc_safe_mode, 6065 .exit_safe_mode = iceland_exit_rlc_safe_mode 6066 }; 6067 6068 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 6069 bool enable) 6070 { 6071 uint32_t temp, data; 6072 6073 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6074 6075 /* It is disabled by HW by default */ 6076 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 6077 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6078 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 6079 /* 1 - RLC memory Light sleep */ 6080 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 6081 6082 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 6083 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 6084 } 6085 6086 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 6087 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6088 if (adev->flags & AMD_IS_APU) 6089 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6090 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6091 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 6092 else 6093 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6094 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6095 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6096 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6097 6098 if (temp != data) 6099 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6100 6101 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6102 gfx_v8_0_wait_for_rlc_serdes(adev); 6103 6104 /* 5 - clear mgcg override */ 6105 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6106 6107 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 6108 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 6109 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6110 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 6111 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 6112 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 6113 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 6114 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 6115 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 6116 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 6117 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 6118 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 6119 if (temp != data) 6120 WREG32(mmCGTS_SM_CTRL_REG, data); 6121 } 6122 udelay(50); 6123 6124 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6125 gfx_v8_0_wait_for_rlc_serdes(adev); 6126 } else { 6127 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 6128 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6129 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 6130 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 6131 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 6132 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 6133 if (temp != data) 6134 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 6135 6136 /* 2 - disable MGLS in RLC */ 6137 data = RREG32(mmRLC_MEM_SLP_CNTL); 6138 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 6139 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 6140 WREG32(mmRLC_MEM_SLP_CNTL, data); 6141 } 6142 6143 /* 3 - disable MGLS in CP */ 6144 data = RREG32(mmCP_MEM_SLP_CNTL); 6145 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 6146 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 6147 WREG32(mmCP_MEM_SLP_CNTL, data); 6148 } 6149 6150 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 6151 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 6152 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 6153 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 6154 if (temp != data) 6155 WREG32(mmCGTS_SM_CTRL_REG, data); 6156 6157 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6158 gfx_v8_0_wait_for_rlc_serdes(adev); 6159 6160 /* 6 - set mgcg override */ 6161 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6162 6163 udelay(50); 6164 6165 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6166 gfx_v8_0_wait_for_rlc_serdes(adev); 6167 } 6168 6169 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6170 } 6171 6172 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 6173 bool enable) 6174 { 6175 uint32_t temp, temp1, data, data1; 6176 6177 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 6178 6179 adev->gfx.rlc.funcs->enter_safe_mode(adev); 6180 6181 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 6182 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6183 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 6184 if (temp1 != data1) 6185 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6186 6187 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6188 gfx_v8_0_wait_for_rlc_serdes(adev); 6189 6190 /* 2 - clear cgcg override */ 6191 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6192 6193 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6194 gfx_v8_0_wait_for_rlc_serdes(adev); 6195 6196 /* 3 - write cmd to set CGLS */ 6197 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6198 6199 /* 4 - enable cgcg */ 6200 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6201 6202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6203 /* enable cgls*/ 6204 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6205 6206 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6207 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6208 6209 if (temp1 != data1) 6210 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6211 } else { 6212 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6213 } 6214 6215 if (temp != data) 6216 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6217 6218 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6219 * Cmp_busy/GFX_Idle interrupts 6220 */ 6221 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6222 } else { 6223 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6224 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6225 6226 /* TEST CGCG */ 6227 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6228 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6229 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6230 if (temp1 != data1) 6231 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6232 6233 /* read gfx register to wake up cgcg */ 6234 RREG32(mmCB_CGTT_SCLK_CTRL); 6235 RREG32(mmCB_CGTT_SCLK_CTRL); 6236 RREG32(mmCB_CGTT_SCLK_CTRL); 6237 RREG32(mmCB_CGTT_SCLK_CTRL); 6238 6239 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6240 gfx_v8_0_wait_for_rlc_serdes(adev); 6241 6242 /* write cmd to Set CGCG Overrride */ 6243 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6244 6245 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6246 gfx_v8_0_wait_for_rlc_serdes(adev); 6247 6248 /* write cmd to Clear CGLS */ 6249 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6250 6251 /* disable cgcg, cgls should be disabled too. */ 6252 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6253 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6254 if (temp != data) 6255 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6256 } 6257 6258 gfx_v8_0_wait_for_rlc_serdes(adev); 6259 6260 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6261 } 6262 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6263 bool enable) 6264 { 6265 if (enable) { 6266 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6267 * === MGCG + MGLS + TS(CG/LS) === 6268 */ 6269 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6270 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6271 } else { 6272 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6273 * === CGCG + CGLS === 6274 */ 6275 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6276 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6277 } 6278 return 0; 6279 } 6280 6281 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6282 enum amd_clockgating_state state) 6283 { 6284 uint32_t msg_id, pp_state = 0; 6285 uint32_t pp_support_state = 0; 6286 void *pp_handle = adev->powerplay.pp_handle; 6287 6288 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6289 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6290 pp_support_state = PP_STATE_SUPPORT_LS; 6291 pp_state = PP_STATE_LS; 6292 } 6293 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6294 pp_support_state |= PP_STATE_SUPPORT_CG; 6295 pp_state |= PP_STATE_CG; 6296 } 6297 if (state == AMD_CG_STATE_UNGATE) 6298 pp_state = 0; 6299 6300 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6301 PP_BLOCK_GFX_CG, 6302 pp_support_state, 6303 pp_state); 6304 amd_set_clockgating_by_smu(pp_handle, msg_id); 6305 } 6306 6307 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6308 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6309 pp_support_state = PP_STATE_SUPPORT_LS; 6310 pp_state = PP_STATE_LS; 6311 } 6312 6313 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6314 pp_support_state |= PP_STATE_SUPPORT_CG; 6315 pp_state |= PP_STATE_CG; 6316 } 6317 6318 if (state == AMD_CG_STATE_UNGATE) 6319 pp_state = 0; 6320 6321 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6322 PP_BLOCK_GFX_MG, 6323 pp_support_state, 6324 pp_state); 6325 amd_set_clockgating_by_smu(pp_handle, msg_id); 6326 } 6327 6328 return 0; 6329 } 6330 6331 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6332 enum amd_clockgating_state state) 6333 { 6334 6335 uint32_t msg_id, pp_state = 0; 6336 uint32_t pp_support_state = 0; 6337 void *pp_handle = adev->powerplay.pp_handle; 6338 6339 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6340 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6341 pp_support_state = PP_STATE_SUPPORT_LS; 6342 pp_state = PP_STATE_LS; 6343 } 6344 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6345 pp_support_state |= PP_STATE_SUPPORT_CG; 6346 pp_state |= PP_STATE_CG; 6347 } 6348 if (state == AMD_CG_STATE_UNGATE) 6349 pp_state = 0; 6350 6351 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6352 PP_BLOCK_GFX_CG, 6353 pp_support_state, 6354 pp_state); 6355 amd_set_clockgating_by_smu(pp_handle, msg_id); 6356 } 6357 6358 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6359 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6360 pp_support_state = PP_STATE_SUPPORT_LS; 6361 pp_state = PP_STATE_LS; 6362 } 6363 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6364 pp_support_state |= PP_STATE_SUPPORT_CG; 6365 pp_state |= PP_STATE_CG; 6366 } 6367 if (state == AMD_CG_STATE_UNGATE) 6368 pp_state = 0; 6369 6370 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6371 PP_BLOCK_GFX_3D, 6372 pp_support_state, 6373 pp_state); 6374 amd_set_clockgating_by_smu(pp_handle, msg_id); 6375 } 6376 6377 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6378 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6379 pp_support_state = PP_STATE_SUPPORT_LS; 6380 pp_state = PP_STATE_LS; 6381 } 6382 6383 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6384 pp_support_state |= PP_STATE_SUPPORT_CG; 6385 pp_state |= PP_STATE_CG; 6386 } 6387 6388 if (state == AMD_CG_STATE_UNGATE) 6389 pp_state = 0; 6390 6391 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6392 PP_BLOCK_GFX_MG, 6393 pp_support_state, 6394 pp_state); 6395 amd_set_clockgating_by_smu(pp_handle, msg_id); 6396 } 6397 6398 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6399 pp_support_state = PP_STATE_SUPPORT_LS; 6400 6401 if (state == AMD_CG_STATE_UNGATE) 6402 pp_state = 0; 6403 else 6404 pp_state = PP_STATE_LS; 6405 6406 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6407 PP_BLOCK_GFX_RLC, 6408 pp_support_state, 6409 pp_state); 6410 amd_set_clockgating_by_smu(pp_handle, msg_id); 6411 } 6412 6413 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6414 pp_support_state = PP_STATE_SUPPORT_LS; 6415 6416 if (state == AMD_CG_STATE_UNGATE) 6417 pp_state = 0; 6418 else 6419 pp_state = PP_STATE_LS; 6420 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6421 PP_BLOCK_GFX_CP, 6422 pp_support_state, 6423 pp_state); 6424 amd_set_clockgating_by_smu(pp_handle, msg_id); 6425 } 6426 6427 return 0; 6428 } 6429 6430 static int gfx_v8_0_set_clockgating_state(void *handle, 6431 enum amd_clockgating_state state) 6432 { 6433 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6434 6435 if (amdgpu_sriov_vf(adev)) 6436 return 0; 6437 6438 switch (adev->asic_type) { 6439 case CHIP_FIJI: 6440 case CHIP_CARRIZO: 6441 case CHIP_STONEY: 6442 gfx_v8_0_update_gfx_clock_gating(adev, 6443 state == AMD_CG_STATE_GATE); 6444 break; 6445 case CHIP_TONGA: 6446 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6447 break; 6448 case CHIP_POLARIS10: 6449 case CHIP_POLARIS11: 6450 case CHIP_POLARIS12: 6451 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6452 break; 6453 default: 6454 break; 6455 } 6456 return 0; 6457 } 6458 6459 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6460 { 6461 return ring->adev->wb.wb[ring->rptr_offs]; 6462 } 6463 6464 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6465 { 6466 struct amdgpu_device *adev = ring->adev; 6467 6468 if (ring->use_doorbell) 6469 /* XXX check if swapping is necessary on BE */ 6470 return ring->adev->wb.wb[ring->wptr_offs]; 6471 else 6472 return RREG32(mmCP_RB0_WPTR); 6473 } 6474 6475 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6476 { 6477 struct amdgpu_device *adev = ring->adev; 6478 6479 if (ring->use_doorbell) { 6480 /* XXX check if swapping is necessary on BE */ 6481 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6482 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6483 } else { 6484 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6485 (void)RREG32(mmCP_RB0_WPTR); 6486 } 6487 } 6488 6489 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6490 { 6491 u32 ref_and_mask, reg_mem_engine; 6492 6493 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6494 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6495 switch (ring->me) { 6496 case 1: 6497 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6498 break; 6499 case 2: 6500 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6501 break; 6502 default: 6503 return; 6504 } 6505 reg_mem_engine = 0; 6506 } else { 6507 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6508 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6509 } 6510 6511 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6512 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6513 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6514 reg_mem_engine)); 6515 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6516 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6517 amdgpu_ring_write(ring, ref_and_mask); 6518 amdgpu_ring_write(ring, ref_and_mask); 6519 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6520 } 6521 6522 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6523 { 6524 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6525 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6526 EVENT_INDEX(4)); 6527 6528 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6529 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6530 EVENT_INDEX(0)); 6531 } 6532 6533 6534 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) 6535 { 6536 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6537 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6538 WRITE_DATA_DST_SEL(0) | 6539 WR_CONFIRM)); 6540 amdgpu_ring_write(ring, mmHDP_DEBUG0); 6541 amdgpu_ring_write(ring, 0); 6542 amdgpu_ring_write(ring, 1); 6543 6544 } 6545 6546 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6547 struct amdgpu_ib *ib, 6548 unsigned vm_id, bool ctx_switch) 6549 { 6550 u32 header, control = 0; 6551 6552 if (ib->flags & AMDGPU_IB_FLAG_CE) 6553 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6554 else 6555 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6556 6557 control |= ib->length_dw | (vm_id << 24); 6558 6559 if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) 6560 control |= INDIRECT_BUFFER_PRE_ENB(1); 6561 6562 amdgpu_ring_write(ring, header); 6563 amdgpu_ring_write(ring, 6564 #ifdef __BIG_ENDIAN 6565 (2 << 0) | 6566 #endif 6567 (ib->gpu_addr & 0xFFFFFFFC)); 6568 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6569 amdgpu_ring_write(ring, control); 6570 } 6571 6572 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6573 struct amdgpu_ib *ib, 6574 unsigned vm_id, bool ctx_switch) 6575 { 6576 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); 6577 6578 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6579 amdgpu_ring_write(ring, 6580 #ifdef __BIG_ENDIAN 6581 (2 << 0) | 6582 #endif 6583 (ib->gpu_addr & 0xFFFFFFFC)); 6584 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6585 amdgpu_ring_write(ring, control); 6586 } 6587 6588 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6589 u64 seq, unsigned flags) 6590 { 6591 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6592 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6593 6594 /* EVENT_WRITE_EOP - flush caches, send int */ 6595 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6596 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6597 EOP_TC_ACTION_EN | 6598 EOP_TC_WB_ACTION_EN | 6599 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6600 EVENT_INDEX(5))); 6601 amdgpu_ring_write(ring, addr & 0xfffffffc); 6602 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6603 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6604 amdgpu_ring_write(ring, lower_32_bits(seq)); 6605 amdgpu_ring_write(ring, upper_32_bits(seq)); 6606 6607 } 6608 6609 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6610 { 6611 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6612 uint32_t seq = ring->fence_drv.sync_seq; 6613 uint64_t addr = ring->fence_drv.gpu_addr; 6614 6615 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6616 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6617 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6618 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6619 amdgpu_ring_write(ring, addr & 0xfffffffc); 6620 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6621 amdgpu_ring_write(ring, seq); 6622 amdgpu_ring_write(ring, 0xffffffff); 6623 amdgpu_ring_write(ring, 4); /* poll interval */ 6624 } 6625 6626 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6627 unsigned vm_id, uint64_t pd_addr) 6628 { 6629 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6630 6631 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6632 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) | 6633 WRITE_DATA_DST_SEL(0)) | 6634 WR_CONFIRM); 6635 if (vm_id < 8) { 6636 amdgpu_ring_write(ring, 6637 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id)); 6638 } else { 6639 amdgpu_ring_write(ring, 6640 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8)); 6641 } 6642 amdgpu_ring_write(ring, 0); 6643 amdgpu_ring_write(ring, pd_addr >> 12); 6644 6645 /* bits 0-15 are the VM contexts0-15 */ 6646 /* invalidate the cache */ 6647 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6648 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6649 WRITE_DATA_DST_SEL(0))); 6650 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6651 amdgpu_ring_write(ring, 0); 6652 amdgpu_ring_write(ring, 1 << vm_id); 6653 6654 /* wait for the invalidate to complete */ 6655 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6656 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6657 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6658 WAIT_REG_MEM_ENGINE(0))); /* me */ 6659 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6660 amdgpu_ring_write(ring, 0); 6661 amdgpu_ring_write(ring, 0); /* ref */ 6662 amdgpu_ring_write(ring, 0); /* mask */ 6663 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6664 6665 /* compute doesn't have PFP */ 6666 if (usepfp) { 6667 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6668 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6669 amdgpu_ring_write(ring, 0x0); 6670 } 6671 } 6672 6673 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6674 { 6675 return ring->adev->wb.wb[ring->wptr_offs]; 6676 } 6677 6678 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6679 { 6680 struct amdgpu_device *adev = ring->adev; 6681 6682 /* XXX check if swapping is necessary on BE */ 6683 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6684 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6685 } 6686 6687 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6688 u64 addr, u64 seq, 6689 unsigned flags) 6690 { 6691 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6692 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6693 6694 /* RELEASE_MEM - flush caches, send int */ 6695 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6696 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6697 EOP_TC_ACTION_EN | 6698 EOP_TC_WB_ACTION_EN | 6699 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6700 EVENT_INDEX(5))); 6701 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6702 amdgpu_ring_write(ring, addr & 0xfffffffc); 6703 amdgpu_ring_write(ring, upper_32_bits(addr)); 6704 amdgpu_ring_write(ring, lower_32_bits(seq)); 6705 amdgpu_ring_write(ring, upper_32_bits(seq)); 6706 } 6707 6708 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6709 u64 seq, unsigned int flags) 6710 { 6711 /* we only allocate 32bit for each seq wb address */ 6712 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6713 6714 /* write fence seq to the "addr" */ 6715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6716 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6717 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6718 amdgpu_ring_write(ring, lower_32_bits(addr)); 6719 amdgpu_ring_write(ring, upper_32_bits(addr)); 6720 amdgpu_ring_write(ring, lower_32_bits(seq)); 6721 6722 if (flags & AMDGPU_FENCE_FLAG_INT) { 6723 /* set register to trigger INT */ 6724 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6725 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6726 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6727 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6728 amdgpu_ring_write(ring, 0); 6729 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6730 } 6731 } 6732 6733 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6734 { 6735 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6736 amdgpu_ring_write(ring, 0); 6737 } 6738 6739 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6740 { 6741 uint32_t dw2 = 0; 6742 6743 if (amdgpu_sriov_vf(ring->adev)) 6744 gfx_v8_0_ring_emit_ce_meta_init(ring, 6745 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6746 6747 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6748 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6749 gfx_v8_0_ring_emit_vgt_flush(ring); 6750 /* set load_global_config & load_global_uconfig */ 6751 dw2 |= 0x8001; 6752 /* set load_cs_sh_regs */ 6753 dw2 |= 0x01000000; 6754 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6755 dw2 |= 0x10002; 6756 6757 /* set load_ce_ram if preamble presented */ 6758 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6759 dw2 |= 0x10000000; 6760 } else { 6761 /* still load_ce_ram if this is the first time preamble presented 6762 * although there is no context switch happens. 6763 */ 6764 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6765 dw2 |= 0x10000000; 6766 } 6767 6768 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6769 amdgpu_ring_write(ring, dw2); 6770 amdgpu_ring_write(ring, 0); 6771 6772 if (amdgpu_sriov_vf(ring->adev)) 6773 gfx_v8_0_ring_emit_de_meta_init(ring, 6774 (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); 6775 } 6776 6777 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6778 { 6779 unsigned ret; 6780 6781 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6782 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6783 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6784 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6785 ret = ring->wptr & ring->buf_mask; 6786 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6787 return ret; 6788 } 6789 6790 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6791 { 6792 unsigned cur; 6793 6794 BUG_ON(offset > ring->buf_mask); 6795 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6796 6797 cur = (ring->wptr & ring->buf_mask) - 1; 6798 if (likely(cur > offset)) 6799 ring->ring[offset] = cur - offset; 6800 else 6801 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6802 } 6803 6804 6805 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6806 { 6807 struct amdgpu_device *adev = ring->adev; 6808 6809 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6810 amdgpu_ring_write(ring, 0 | /* src: register*/ 6811 (5 << 8) | /* dst: memory */ 6812 (1 << 20)); /* write confirm */ 6813 amdgpu_ring_write(ring, reg); 6814 amdgpu_ring_write(ring, 0); 6815 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6816 adev->virt.reg_val_offs * 4)); 6817 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6818 adev->virt.reg_val_offs * 4)); 6819 } 6820 6821 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6822 uint32_t val) 6823 { 6824 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6825 amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ 6826 amdgpu_ring_write(ring, reg); 6827 amdgpu_ring_write(ring, 0); 6828 amdgpu_ring_write(ring, val); 6829 } 6830 6831 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6832 enum amdgpu_interrupt_state state) 6833 { 6834 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6835 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6836 } 6837 6838 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6839 int me, int pipe, 6840 enum amdgpu_interrupt_state state) 6841 { 6842 /* 6843 * amdgpu controls only pipe 0 of MEC1. That's why this function only 6844 * handles the setting of interrupts for this specific pipe. All other 6845 * pipes' interrupts are set by amdkfd. 6846 */ 6847 6848 if (me == 1) { 6849 switch (pipe) { 6850 case 0: 6851 break; 6852 default: 6853 DRM_DEBUG("invalid pipe %d\n", pipe); 6854 return; 6855 } 6856 } else { 6857 DRM_DEBUG("invalid me %d\n", me); 6858 return; 6859 } 6860 6861 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 6862 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6863 } 6864 6865 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6866 struct amdgpu_irq_src *source, 6867 unsigned type, 6868 enum amdgpu_interrupt_state state) 6869 { 6870 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6871 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6872 6873 return 0; 6874 } 6875 6876 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6877 struct amdgpu_irq_src *source, 6878 unsigned type, 6879 enum amdgpu_interrupt_state state) 6880 { 6881 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6882 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6883 6884 return 0; 6885 } 6886 6887 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6888 struct amdgpu_irq_src *src, 6889 unsigned type, 6890 enum amdgpu_interrupt_state state) 6891 { 6892 switch (type) { 6893 case AMDGPU_CP_IRQ_GFX_EOP: 6894 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6895 break; 6896 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6897 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6898 break; 6899 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6900 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6901 break; 6902 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6903 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6904 break; 6905 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6906 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6907 break; 6908 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6909 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6910 break; 6911 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6912 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6913 break; 6914 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6915 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6916 break; 6917 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6918 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6919 break; 6920 default: 6921 break; 6922 } 6923 return 0; 6924 } 6925 6926 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6927 struct amdgpu_irq_src *source, 6928 struct amdgpu_iv_entry *entry) 6929 { 6930 int i; 6931 u8 me_id, pipe_id, queue_id; 6932 struct amdgpu_ring *ring; 6933 6934 DRM_DEBUG("IH: CP EOP\n"); 6935 me_id = (entry->ring_id & 0x0c) >> 2; 6936 pipe_id = (entry->ring_id & 0x03) >> 0; 6937 queue_id = (entry->ring_id & 0x70) >> 4; 6938 6939 switch (me_id) { 6940 case 0: 6941 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6942 break; 6943 case 1: 6944 case 2: 6945 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6946 ring = &adev->gfx.compute_ring[i]; 6947 /* Per-queue interrupt is supported for MEC starting from VI. 6948 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6949 */ 6950 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6951 amdgpu_fence_process(ring); 6952 } 6953 break; 6954 } 6955 return 0; 6956 } 6957 6958 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6959 struct amdgpu_irq_src *source, 6960 struct amdgpu_iv_entry *entry) 6961 { 6962 DRM_ERROR("Illegal register access in command stream\n"); 6963 schedule_work(&adev->reset_work); 6964 return 0; 6965 } 6966 6967 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6968 struct amdgpu_irq_src *source, 6969 struct amdgpu_iv_entry *entry) 6970 { 6971 DRM_ERROR("Illegal instruction in command stream\n"); 6972 schedule_work(&adev->reset_work); 6973 return 0; 6974 } 6975 6976 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 6977 struct amdgpu_irq_src *src, 6978 unsigned int type, 6979 enum amdgpu_interrupt_state state) 6980 { 6981 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 6982 6983 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 6984 6985 switch (type) { 6986 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 6987 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 6988 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6989 if (ring->me == 1) 6990 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 6991 ring->pipe, 6992 GENERIC2_INT_ENABLE, 6993 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6994 else 6995 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 6996 ring->pipe, 6997 GENERIC2_INT_ENABLE, 6998 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6999 break; 7000 default: 7001 BUG(); /* kiq only support GENERIC2_INT now */ 7002 break; 7003 } 7004 return 0; 7005 } 7006 7007 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7008 struct amdgpu_irq_src *source, 7009 struct amdgpu_iv_entry *entry) 7010 { 7011 u8 me_id, pipe_id, queue_id; 7012 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7013 7014 BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); 7015 7016 me_id = (entry->ring_id & 0x0c) >> 2; 7017 pipe_id = (entry->ring_id & 0x03) >> 0; 7018 queue_id = (entry->ring_id & 0x70) >> 4; 7019 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7020 me_id, pipe_id, queue_id); 7021 7022 amdgpu_fence_process(ring); 7023 return 0; 7024 } 7025 7026 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7027 .name = "gfx_v8_0", 7028 .early_init = gfx_v8_0_early_init, 7029 .late_init = gfx_v8_0_late_init, 7030 .sw_init = gfx_v8_0_sw_init, 7031 .sw_fini = gfx_v8_0_sw_fini, 7032 .hw_init = gfx_v8_0_hw_init, 7033 .hw_fini = gfx_v8_0_hw_fini, 7034 .suspend = gfx_v8_0_suspend, 7035 .resume = gfx_v8_0_resume, 7036 .is_idle = gfx_v8_0_is_idle, 7037 .wait_for_idle = gfx_v8_0_wait_for_idle, 7038 .check_soft_reset = gfx_v8_0_check_soft_reset, 7039 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7040 .soft_reset = gfx_v8_0_soft_reset, 7041 .post_soft_reset = gfx_v8_0_post_soft_reset, 7042 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7043 .set_powergating_state = gfx_v8_0_set_powergating_state, 7044 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7045 }; 7046 7047 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7048 .type = AMDGPU_RING_TYPE_GFX, 7049 .align_mask = 0xff, 7050 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7051 .support_64bit_ptrs = false, 7052 .get_rptr = gfx_v8_0_ring_get_rptr, 7053 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7054 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7055 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7056 5 + /* COND_EXEC */ 7057 7 + /* PIPELINE_SYNC */ 7058 19 + /* VM_FLUSH */ 7059 8 + /* FENCE for VM_FLUSH */ 7060 20 + /* GDS switch */ 7061 4 + /* double SWITCH_BUFFER, 7062 the first COND_EXEC jump to the place just 7063 prior to this double SWITCH_BUFFER */ 7064 5 + /* COND_EXEC */ 7065 7 + /* HDP_flush */ 7066 4 + /* VGT_flush */ 7067 14 + /* CE_META */ 7068 31 + /* DE_META */ 7069 3 + /* CNTX_CTRL */ 7070 5 + /* HDP_INVL */ 7071 8 + 8 + /* FENCE x2 */ 7072 2, /* SWITCH_BUFFER */ 7073 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7074 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7075 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7076 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7077 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7078 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7079 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7080 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7081 .test_ring = gfx_v8_0_ring_test_ring, 7082 .test_ib = gfx_v8_0_ring_test_ib, 7083 .insert_nop = amdgpu_ring_insert_nop, 7084 .pad_ib = amdgpu_ring_generic_pad_ib, 7085 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7086 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7087 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7088 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7089 }; 7090 7091 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7092 .type = AMDGPU_RING_TYPE_COMPUTE, 7093 .align_mask = 0xff, 7094 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7095 .support_64bit_ptrs = false, 7096 .get_rptr = gfx_v8_0_ring_get_rptr, 7097 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7098 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7099 .emit_frame_size = 7100 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7101 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7102 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7103 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7104 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7105 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7106 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7107 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7108 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7109 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7110 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7111 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7112 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7113 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, 7114 .test_ring = gfx_v8_0_ring_test_ring, 7115 .test_ib = gfx_v8_0_ring_test_ib, 7116 .insert_nop = amdgpu_ring_insert_nop, 7117 .pad_ib = amdgpu_ring_generic_pad_ib, 7118 }; 7119 7120 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7121 .type = AMDGPU_RING_TYPE_KIQ, 7122 .align_mask = 0xff, 7123 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7124 .support_64bit_ptrs = false, 7125 .get_rptr = gfx_v8_0_ring_get_rptr, 7126 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7127 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7128 .emit_frame_size = 7129 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7130 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7131 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ 7132 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7133 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7134 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7135 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7136 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7137 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7138 .test_ring = gfx_v8_0_ring_test_ring, 7139 .test_ib = gfx_v8_0_ring_test_ib, 7140 .insert_nop = amdgpu_ring_insert_nop, 7141 .pad_ib = amdgpu_ring_generic_pad_ib, 7142 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7143 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7144 }; 7145 7146 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7147 { 7148 int i; 7149 7150 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7151 7152 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7153 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7154 7155 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7156 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7157 } 7158 7159 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7160 .set = gfx_v8_0_set_eop_interrupt_state, 7161 .process = gfx_v8_0_eop_irq, 7162 }; 7163 7164 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7165 .set = gfx_v8_0_set_priv_reg_fault_state, 7166 .process = gfx_v8_0_priv_reg_irq, 7167 }; 7168 7169 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7170 .set = gfx_v8_0_set_priv_inst_fault_state, 7171 .process = gfx_v8_0_priv_inst_irq, 7172 }; 7173 7174 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7175 .set = gfx_v8_0_kiq_set_interrupt_state, 7176 .process = gfx_v8_0_kiq_irq, 7177 }; 7178 7179 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7180 { 7181 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7182 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7183 7184 adev->gfx.priv_reg_irq.num_types = 1; 7185 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7186 7187 adev->gfx.priv_inst_irq.num_types = 1; 7188 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7189 7190 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7191 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7192 } 7193 7194 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7195 { 7196 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7197 } 7198 7199 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7200 { 7201 /* init asci gds info */ 7202 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7203 adev->gds.gws.total_size = 64; 7204 adev->gds.oa.total_size = 16; 7205 7206 if (adev->gds.mem.total_size == 64 * 1024) { 7207 adev->gds.mem.gfx_partition_size = 4096; 7208 adev->gds.mem.cs_partition_size = 4096; 7209 7210 adev->gds.gws.gfx_partition_size = 4; 7211 adev->gds.gws.cs_partition_size = 4; 7212 7213 adev->gds.oa.gfx_partition_size = 4; 7214 adev->gds.oa.cs_partition_size = 1; 7215 } else { 7216 adev->gds.mem.gfx_partition_size = 1024; 7217 adev->gds.mem.cs_partition_size = 1024; 7218 7219 adev->gds.gws.gfx_partition_size = 16; 7220 adev->gds.gws.cs_partition_size = 16; 7221 7222 adev->gds.oa.gfx_partition_size = 4; 7223 adev->gds.oa.cs_partition_size = 4; 7224 } 7225 } 7226 7227 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7228 u32 bitmap) 7229 { 7230 u32 data; 7231 7232 if (!bitmap) 7233 return; 7234 7235 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7236 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7237 7238 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7239 } 7240 7241 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7242 { 7243 u32 data, mask; 7244 7245 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7246 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7247 7248 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); 7249 7250 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7251 } 7252 7253 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7254 { 7255 int i, j, k, counter, active_cu_number = 0; 7256 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7257 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7258 unsigned disable_masks[4 * 2]; 7259 7260 memset(cu_info, 0, sizeof(*cu_info)); 7261 7262 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7263 7264 mutex_lock(&adev->grbm_idx_mutex); 7265 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7266 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7267 mask = 1; 7268 ao_bitmap = 0; 7269 counter = 0; 7270 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7271 if (i < 4 && j < 2) 7272 gfx_v8_0_set_user_cu_inactive_bitmap( 7273 adev, disable_masks[i * 2 + j]); 7274 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7275 cu_info->bitmap[i][j] = bitmap; 7276 7277 for (k = 0; k < 16; k ++) { 7278 if (bitmap & mask) { 7279 if (counter < 2) 7280 ao_bitmap |= mask; 7281 counter ++; 7282 } 7283 mask <<= 1; 7284 } 7285 active_cu_number += counter; 7286 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7287 } 7288 } 7289 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7290 mutex_unlock(&adev->grbm_idx_mutex); 7291 7292 cu_info->number = active_cu_number; 7293 cu_info->ao_cu_mask = ao_cu_mask; 7294 } 7295 7296 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7297 { 7298 .type = AMD_IP_BLOCK_TYPE_GFX, 7299 .major = 8, 7300 .minor = 0, 7301 .rev = 0, 7302 .funcs = &gfx_v8_0_ip_funcs, 7303 }; 7304 7305 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7306 { 7307 .type = AMD_IP_BLOCK_TYPE_GFX, 7308 .major = 8, 7309 .minor = 1, 7310 .rev = 0, 7311 .funcs = &gfx_v8_0_ip_funcs, 7312 }; 7313 7314 static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7315 { 7316 uint64_t ce_payload_addr; 7317 int cnt_ce; 7318 static union { 7319 struct vi_ce_ib_state regular; 7320 struct vi_ce_ib_state_chained_ib chained; 7321 } ce_payload = {}; 7322 7323 if (ring->adev->virt.chained_ib_support) { 7324 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7325 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7326 } else { 7327 ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); 7328 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7329 } 7330 7331 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7332 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7333 WRITE_DATA_DST_SEL(8) | 7334 WR_CONFIRM) | 7335 WRITE_DATA_CACHE_POLICY(0)); 7336 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7337 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7338 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7339 } 7340 7341 static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) 7342 { 7343 uint64_t de_payload_addr, gds_addr; 7344 int cnt_de; 7345 static union { 7346 struct vi_de_ib_state regular; 7347 struct vi_de_ib_state_chained_ib chained; 7348 } de_payload = {}; 7349 7350 gds_addr = csa_addr + 4096; 7351 if (ring->adev->virt.chained_ib_support) { 7352 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7353 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7354 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7355 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7356 } else { 7357 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7358 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7359 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7360 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7361 } 7362 7363 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7364 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7365 WRITE_DATA_DST_SEL(8) | 7366 WR_CONFIRM) | 7367 WRITE_DATA_CACHE_POLICY(0)); 7368 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7369 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7370 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7371 } 7372 7373 /* create MQD for each compute queue */ 7374 static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) 7375 { 7376 struct amdgpu_ring *ring = NULL; 7377 int r, i; 7378 7379 /* create MQD for KIQ */ 7380 ring = &adev->gfx.kiq.ring; 7381 if (!ring->mqd_obj) { 7382 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7383 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7384 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7385 if (r) { 7386 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7387 return r; 7388 } 7389 7390 /* prepare MQD backup */ 7391 adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7392 if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) 7393 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7394 } 7395 7396 /* create MQD for each KCQ */ 7397 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7398 ring = &adev->gfx.compute_ring[i]; 7399 if (!ring->mqd_obj) { 7400 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, 7401 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, 7402 &ring->mqd_gpu_addr, &ring->mqd_ptr); 7403 if (r) { 7404 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); 7405 return r; 7406 } 7407 7408 /* prepare MQD backup */ 7409 adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); 7410 if (!adev->gfx.mec.mqd_backup[i]) 7411 dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); 7412 } 7413 } 7414 7415 return 0; 7416 } 7417 7418 static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) 7419 { 7420 struct amdgpu_ring *ring = NULL; 7421 int i; 7422 7423 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 7424 ring = &adev->gfx.compute_ring[i]; 7425 kfree(adev->gfx.mec.mqd_backup[i]); 7426 amdgpu_bo_free_kernel(&ring->mqd_obj, 7427 &ring->mqd_gpu_addr, 7428 &ring->mqd_ptr); 7429 } 7430 7431 ring = &adev->gfx.kiq.ring; 7432 kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); 7433 amdgpu_bo_free_kernel(&ring->mqd_obj, 7434 &ring->mqd_gpu_addr, 7435 &ring->mqd_ptr); 7436 } 7437