1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/kernel.h> 26 #include <linux/firmware.h> 27 #include <linux/module.h> 28 #include <linux/pci.h> 29 30 #include "amdgpu.h" 31 #include "amdgpu_gfx.h" 32 #include "amdgpu_ring.h" 33 #include "vi.h" 34 #include "vi_structs.h" 35 #include "vid.h" 36 #include "amdgpu_ucode.h" 37 #include "amdgpu_atombios.h" 38 #include "atombios_i2c.h" 39 #include "clearstate_vi.h" 40 41 #include "gmc/gmc_8_2_d.h" 42 #include "gmc/gmc_8_2_sh_mask.h" 43 44 #include "oss/oss_3_0_d.h" 45 #include "oss/oss_3_0_sh_mask.h" 46 47 #include "bif/bif_5_0_d.h" 48 #include "bif/bif_5_0_sh_mask.h" 49 #include "gca/gfx_8_0_d.h" 50 #include "gca/gfx_8_0_enum.h" 51 #include "gca/gfx_8_0_sh_mask.h" 52 53 #include "dce/dce_10_0_d.h" 54 #include "dce/dce_10_0_sh_mask.h" 55 56 #include "smu/smu_7_1_3_d.h" 57 58 #include "ivsrcid/ivsrcid_vislands30.h" 59 60 #define GFX8_NUM_GFX_RINGS 1 61 #define GFX8_MEC_HPD_SIZE 4096 62 63 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 64 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 65 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 66 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 67 68 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 69 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 70 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 71 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 72 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 73 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 74 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 75 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 76 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 77 78 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 79 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 80 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 81 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 82 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 83 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 84 85 /* BPM SERDES CMD */ 86 #define SET_BPM_SERDES_CMD 1 87 #define CLE_BPM_SERDES_CMD 0 88 89 /* BPM Register Address*/ 90 enum { 91 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 92 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 93 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 94 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 95 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 96 BPM_REG_FGCG_MAX 97 }; 98 99 #define RLC_FormatDirectRegListLength 14 100 101 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 103 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 104 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 105 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 106 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 107 108 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 109 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 110 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 111 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 112 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 113 114 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 116 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 117 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 118 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 119 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 120 121 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 122 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 123 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 124 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 125 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 126 127 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 129 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 130 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 131 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 132 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 133 134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 141 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 142 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 145 146 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 153 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 154 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 157 158 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 165 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 166 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 167 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 168 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 169 170 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 172 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 173 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 174 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 175 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 176 177 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 178 { 179 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 180 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 181 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 182 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 183 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 184 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 185 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 186 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 187 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 188 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 189 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 190 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 191 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 192 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 193 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 194 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 195 }; 196 197 static const u32 golden_settings_tonga_a11[] = 198 { 199 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 200 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 201 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 202 mmGB_GPU_ID, 0x0000000f, 0x00000000, 203 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 204 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 205 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 206 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 207 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 208 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 209 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 210 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 211 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 212 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 213 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 214 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 215 }; 216 217 static const u32 tonga_golden_common_all[] = 218 { 219 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 220 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 221 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 222 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 223 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 224 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 225 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 226 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 227 }; 228 229 static const u32 tonga_mgcg_cgcg_init[] = 230 { 231 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 232 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 233 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 234 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 236 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 238 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 240 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 242 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 245 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 246 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 247 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 249 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 250 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 251 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 252 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 253 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 254 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 255 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 256 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 257 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 258 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 259 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 260 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 261 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 262 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 263 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 264 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 265 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 266 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 267 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 268 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 269 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 270 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 271 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 272 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 273 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 274 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 275 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 276 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 277 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 278 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 279 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 280 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 281 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 282 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 283 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 284 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 285 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 286 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 287 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 288 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 289 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 290 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 291 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 292 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 293 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 294 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 295 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 296 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 297 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 298 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 299 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 300 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 301 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 302 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 303 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 304 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 305 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 306 }; 307 308 static const u32 golden_settings_vegam_a11[] = 309 { 310 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 311 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 312 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 313 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 314 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 315 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 316 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 317 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 318 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 319 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 320 mmSQ_CONFIG, 0x07f80000, 0x01180000, 321 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 322 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 323 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 324 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 325 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 326 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 327 }; 328 329 static const u32 vegam_golden_common_all[] = 330 { 331 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 332 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 333 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 334 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 335 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 336 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 337 }; 338 339 static const u32 golden_settings_polaris11_a11[] = 340 { 341 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 342 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 343 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 344 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 345 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 346 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 347 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 348 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 349 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 350 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 351 mmSQ_CONFIG, 0x07f80000, 0x01180000, 352 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 353 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 354 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 355 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 356 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 357 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 358 }; 359 360 static const u32 polaris11_golden_common_all[] = 361 { 362 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 363 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 364 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 365 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 366 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 367 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 368 }; 369 370 static const u32 golden_settings_polaris10_a11[] = 371 { 372 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 373 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 374 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 375 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 376 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 377 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 378 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 379 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 380 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 381 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 382 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 383 mmSQ_CONFIG, 0x07f80000, 0x07180000, 384 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 385 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 386 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 387 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 388 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 389 }; 390 391 static const u32 polaris10_golden_common_all[] = 392 { 393 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 394 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 395 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 396 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 397 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 398 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 399 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 400 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 401 }; 402 403 static const u32 fiji_golden_common_all[] = 404 { 405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 406 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 407 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 408 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 409 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 410 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 411 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 412 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 413 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 414 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 415 }; 416 417 static const u32 golden_settings_fiji_a10[] = 418 { 419 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 420 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 421 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 422 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 423 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 424 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 425 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 426 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 427 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 428 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 429 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 430 }; 431 432 static const u32 fiji_mgcg_cgcg_init[] = 433 { 434 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 435 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 436 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 437 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 439 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 441 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 443 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 445 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 448 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 449 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 450 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 452 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 453 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 454 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 455 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 456 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 457 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 458 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 459 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 460 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 461 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 462 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 463 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 464 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 465 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 466 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 467 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 468 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 469 }; 470 471 static const u32 golden_settings_iceland_a11[] = 472 { 473 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 474 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 475 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 476 mmGB_GPU_ID, 0x0000000f, 0x00000000, 477 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 478 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 479 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 480 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 481 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 482 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 483 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 484 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 485 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 486 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 487 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 488 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 489 }; 490 491 static const u32 iceland_golden_common_all[] = 492 { 493 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 494 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 495 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 496 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 497 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 498 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 499 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 500 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 501 }; 502 503 static const u32 iceland_mgcg_cgcg_init[] = 504 { 505 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 506 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 507 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 508 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 510 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 511 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 512 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 514 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 516 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 519 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 520 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 521 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 523 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 524 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 525 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 526 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 527 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 528 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 529 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 530 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 531 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 532 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 533 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 534 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 535 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 536 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 537 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 538 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 539 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 540 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 541 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 542 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 543 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 544 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 545 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 546 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 547 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 548 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 549 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 550 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 551 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 552 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 553 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 554 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 555 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 556 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 557 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 558 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 559 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 560 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 561 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 562 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 563 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 564 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 565 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 566 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 567 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 568 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 569 }; 570 571 static const u32 cz_golden_settings_a11[] = 572 { 573 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 574 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 575 mmGB_GPU_ID, 0x0000000f, 0x00000000, 576 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 577 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 578 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 579 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 580 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 581 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 582 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 583 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 584 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 585 }; 586 587 static const u32 cz_golden_common_all[] = 588 { 589 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 590 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 591 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 592 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 593 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 594 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 595 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 596 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 597 }; 598 599 static const u32 cz_mgcg_cgcg_init[] = 600 { 601 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 602 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 603 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 606 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 608 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 610 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 612 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 615 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 616 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 617 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 619 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 620 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 621 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 622 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 623 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 624 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 625 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 626 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 627 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 628 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 629 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 630 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 631 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 632 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 633 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 634 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 635 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 636 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 637 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 638 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 639 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 640 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 641 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 642 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 643 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 644 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 645 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 646 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 647 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 648 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 649 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 650 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 651 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 652 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 653 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 654 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 655 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 656 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 657 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 658 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 659 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 660 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 661 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 662 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 663 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 664 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 665 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 666 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 667 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 668 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 669 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 670 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 671 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 672 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 673 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 674 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 675 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 676 }; 677 678 static const u32 stoney_golden_settings_a11[] = 679 { 680 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 681 mmGB_GPU_ID, 0x0000000f, 0x00000000, 682 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 683 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 684 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 685 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 686 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 687 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 688 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 689 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 690 }; 691 692 static const u32 stoney_golden_common_all[] = 693 { 694 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 695 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 696 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 697 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 698 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 699 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 700 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 701 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 702 }; 703 704 static const u32 stoney_mgcg_cgcg_init[] = 705 { 706 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 707 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 708 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 709 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 710 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 711 }; 712 713 714 static const char * const sq_edc_source_names[] = { 715 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 716 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 717 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 718 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 719 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 720 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 721 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 722 }; 723 724 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 725 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 726 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 727 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 728 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 729 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 730 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 731 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 732 733 #define CG_ACLK_CNTL__ACLK_DIVIDER_MASK 0x0000007fL 734 #define CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT 0x00000000L 735 736 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 737 { 738 uint32_t data; 739 740 switch (adev->asic_type) { 741 case CHIP_TOPAZ: 742 amdgpu_device_program_register_sequence(adev, 743 iceland_mgcg_cgcg_init, 744 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 745 amdgpu_device_program_register_sequence(adev, 746 golden_settings_iceland_a11, 747 ARRAY_SIZE(golden_settings_iceland_a11)); 748 amdgpu_device_program_register_sequence(adev, 749 iceland_golden_common_all, 750 ARRAY_SIZE(iceland_golden_common_all)); 751 break; 752 case CHIP_FIJI: 753 amdgpu_device_program_register_sequence(adev, 754 fiji_mgcg_cgcg_init, 755 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 756 amdgpu_device_program_register_sequence(adev, 757 golden_settings_fiji_a10, 758 ARRAY_SIZE(golden_settings_fiji_a10)); 759 amdgpu_device_program_register_sequence(adev, 760 fiji_golden_common_all, 761 ARRAY_SIZE(fiji_golden_common_all)); 762 break; 763 764 case CHIP_TONGA: 765 amdgpu_device_program_register_sequence(adev, 766 tonga_mgcg_cgcg_init, 767 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 768 amdgpu_device_program_register_sequence(adev, 769 golden_settings_tonga_a11, 770 ARRAY_SIZE(golden_settings_tonga_a11)); 771 amdgpu_device_program_register_sequence(adev, 772 tonga_golden_common_all, 773 ARRAY_SIZE(tonga_golden_common_all)); 774 break; 775 case CHIP_VEGAM: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_vegam_a11, 778 ARRAY_SIZE(golden_settings_vegam_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 vegam_golden_common_all, 781 ARRAY_SIZE(vegam_golden_common_all)); 782 break; 783 case CHIP_POLARIS11: 784 case CHIP_POLARIS12: 785 amdgpu_device_program_register_sequence(adev, 786 golden_settings_polaris11_a11, 787 ARRAY_SIZE(golden_settings_polaris11_a11)); 788 amdgpu_device_program_register_sequence(adev, 789 polaris11_golden_common_all, 790 ARRAY_SIZE(polaris11_golden_common_all)); 791 break; 792 case CHIP_POLARIS10: 793 amdgpu_device_program_register_sequence(adev, 794 golden_settings_polaris10_a11, 795 ARRAY_SIZE(golden_settings_polaris10_a11)); 796 amdgpu_device_program_register_sequence(adev, 797 polaris10_golden_common_all, 798 ARRAY_SIZE(polaris10_golden_common_all)); 799 data = RREG32_SMC(ixCG_ACLK_CNTL); 800 data &= ~CG_ACLK_CNTL__ACLK_DIVIDER_MASK; 801 data |= 0x18 << CG_ACLK_CNTL__ACLK_DIVIDER__SHIFT; 802 WREG32_SMC(ixCG_ACLK_CNTL, data); 803 if ((adev->pdev->device == 0x67DF) && (adev->pdev->revision == 0xc7) && 804 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 805 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 806 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1680))) { 807 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 808 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 809 } 810 break; 811 case CHIP_CARRIZO: 812 amdgpu_device_program_register_sequence(adev, 813 cz_mgcg_cgcg_init, 814 ARRAY_SIZE(cz_mgcg_cgcg_init)); 815 amdgpu_device_program_register_sequence(adev, 816 cz_golden_settings_a11, 817 ARRAY_SIZE(cz_golden_settings_a11)); 818 amdgpu_device_program_register_sequence(adev, 819 cz_golden_common_all, 820 ARRAY_SIZE(cz_golden_common_all)); 821 break; 822 case CHIP_STONEY: 823 amdgpu_device_program_register_sequence(adev, 824 stoney_mgcg_cgcg_init, 825 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 826 amdgpu_device_program_register_sequence(adev, 827 stoney_golden_settings_a11, 828 ARRAY_SIZE(stoney_golden_settings_a11)); 829 amdgpu_device_program_register_sequence(adev, 830 stoney_golden_common_all, 831 ARRAY_SIZE(stoney_golden_common_all)); 832 break; 833 default: 834 break; 835 } 836 } 837 838 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 839 { 840 struct amdgpu_device *adev = ring->adev; 841 uint32_t tmp = 0; 842 unsigned i; 843 int r; 844 845 WREG32(mmSCRATCH_REG0, 0xCAFEDEAD); 846 r = amdgpu_ring_alloc(ring, 3); 847 if (r) 848 return r; 849 850 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 851 amdgpu_ring_write(ring, mmSCRATCH_REG0 - PACKET3_SET_UCONFIG_REG_START); 852 amdgpu_ring_write(ring, 0xDEADBEEF); 853 amdgpu_ring_commit(ring); 854 855 for (i = 0; i < adev->usec_timeout; i++) { 856 tmp = RREG32(mmSCRATCH_REG0); 857 if (tmp == 0xDEADBEEF) 858 break; 859 udelay(1); 860 } 861 862 if (i >= adev->usec_timeout) 863 r = -ETIMEDOUT; 864 865 return r; 866 } 867 868 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 869 { 870 struct amdgpu_device *adev = ring->adev; 871 struct amdgpu_ib ib; 872 struct dma_fence *f = NULL; 873 874 unsigned int index; 875 uint64_t gpu_addr; 876 uint32_t tmp; 877 long r; 878 879 r = amdgpu_device_wb_get(adev, &index); 880 if (r) 881 return r; 882 883 gpu_addr = adev->wb.gpu_addr + (index * 4); 884 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 885 memset(&ib, 0, sizeof(ib)); 886 887 r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); 888 if (r) 889 goto err1; 890 891 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 892 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 893 ib.ptr[2] = lower_32_bits(gpu_addr); 894 ib.ptr[3] = upper_32_bits(gpu_addr); 895 ib.ptr[4] = 0xDEADBEEF; 896 ib.length_dw = 5; 897 898 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 899 if (r) 900 goto err2; 901 902 r = dma_fence_wait_timeout(f, false, timeout); 903 if (r == 0) { 904 r = -ETIMEDOUT; 905 goto err2; 906 } else if (r < 0) { 907 goto err2; 908 } 909 910 tmp = adev->wb.wb[index]; 911 if (tmp == 0xDEADBEEF) 912 r = 0; 913 else 914 r = -EINVAL; 915 916 err2: 917 amdgpu_ib_free(&ib, NULL); 918 dma_fence_put(f); 919 err1: 920 amdgpu_device_wb_free(adev, index); 921 return r; 922 } 923 924 925 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 926 { 927 amdgpu_ucode_release(&adev->gfx.pfp_fw); 928 amdgpu_ucode_release(&adev->gfx.me_fw); 929 amdgpu_ucode_release(&adev->gfx.ce_fw); 930 amdgpu_ucode_release(&adev->gfx.rlc_fw); 931 amdgpu_ucode_release(&adev->gfx.mec_fw); 932 if ((adev->asic_type != CHIP_STONEY) && 933 (adev->asic_type != CHIP_TOPAZ)) 934 amdgpu_ucode_release(&adev->gfx.mec2_fw); 935 936 kfree(adev->gfx.rlc.register_list_format); 937 } 938 939 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 940 { 941 const char *chip_name; 942 int err; 943 struct amdgpu_firmware_info *info = NULL; 944 const struct common_firmware_header *header = NULL; 945 const struct gfx_firmware_header_v1_0 *cp_hdr; 946 const struct rlc_firmware_header_v2_0 *rlc_hdr; 947 unsigned int *tmp = NULL, i; 948 949 DRM_DEBUG("\n"); 950 951 switch (adev->asic_type) { 952 case CHIP_TOPAZ: 953 chip_name = "topaz"; 954 break; 955 case CHIP_TONGA: 956 chip_name = "tonga"; 957 break; 958 case CHIP_CARRIZO: 959 chip_name = "carrizo"; 960 break; 961 case CHIP_FIJI: 962 chip_name = "fiji"; 963 break; 964 case CHIP_STONEY: 965 chip_name = "stoney"; 966 break; 967 case CHIP_POLARIS10: 968 chip_name = "polaris10"; 969 break; 970 case CHIP_POLARIS11: 971 chip_name = "polaris11"; 972 break; 973 case CHIP_POLARIS12: 974 chip_name = "polaris12"; 975 break; 976 case CHIP_VEGAM: 977 chip_name = "vegam"; 978 break; 979 default: 980 BUG(); 981 } 982 983 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 984 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 985 AMDGPU_UCODE_OPTIONAL, 986 "amdgpu/%s_pfp_2.bin", chip_name); 987 if (err == -ENODEV) { 988 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 989 AMDGPU_UCODE_REQUIRED, 990 "amdgpu/%s_pfp.bin", chip_name); 991 } 992 } else { 993 err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, 994 AMDGPU_UCODE_REQUIRED, 995 "amdgpu/%s_pfp.bin", chip_name); 996 } 997 if (err) 998 goto out; 999 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1000 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1001 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1002 1003 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1004 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1005 AMDGPU_UCODE_OPTIONAL, 1006 "amdgpu/%s_me_2.bin", chip_name); 1007 if (err == -ENODEV) { 1008 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1009 AMDGPU_UCODE_REQUIRED, 1010 "amdgpu/%s_me.bin", chip_name); 1011 } 1012 } else { 1013 err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, 1014 AMDGPU_UCODE_REQUIRED, 1015 "amdgpu/%s_me.bin", chip_name); 1016 } 1017 if (err) 1018 goto out; 1019 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1020 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1021 1022 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1023 1024 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1025 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1026 AMDGPU_UCODE_OPTIONAL, 1027 "amdgpu/%s_ce_2.bin", chip_name); 1028 if (err == -ENODEV) { 1029 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1030 AMDGPU_UCODE_REQUIRED, 1031 "amdgpu/%s_ce.bin", chip_name); 1032 } 1033 } else { 1034 err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, 1035 AMDGPU_UCODE_REQUIRED, 1036 "amdgpu/%s_ce.bin", chip_name); 1037 } 1038 if (err) 1039 goto out; 1040 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1041 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1042 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1043 1044 /* 1045 * Support for MCBP/Virtualization in combination with chained IBs is 1046 * formal released on feature version #46 1047 */ 1048 if (adev->gfx.ce_feature_version >= 46 && 1049 adev->gfx.pfp_feature_version >= 46) { 1050 adev->virt.chained_ib_support = true; 1051 DRM_INFO("Chained IB support enabled!\n"); 1052 } else 1053 adev->virt.chained_ib_support = false; 1054 1055 err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, 1056 AMDGPU_UCODE_REQUIRED, 1057 "amdgpu/%s_rlc.bin", chip_name); 1058 if (err) 1059 goto out; 1060 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1061 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1062 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1063 1064 adev->gfx.rlc.save_and_restore_offset = 1065 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1066 adev->gfx.rlc.clear_state_descriptor_offset = 1067 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1068 adev->gfx.rlc.avail_scratch_ram_locations = 1069 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1070 adev->gfx.rlc.reg_restore_list_size = 1071 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1072 adev->gfx.rlc.reg_list_format_start = 1073 le32_to_cpu(rlc_hdr->reg_list_format_start); 1074 adev->gfx.rlc.reg_list_format_separate_start = 1075 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1076 adev->gfx.rlc.starting_offsets_start = 1077 le32_to_cpu(rlc_hdr->starting_offsets_start); 1078 adev->gfx.rlc.reg_list_format_size_bytes = 1079 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1080 adev->gfx.rlc.reg_list_size_bytes = 1081 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1082 1083 adev->gfx.rlc.register_list_format = 1084 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1085 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1086 1087 if (!adev->gfx.rlc.register_list_format) { 1088 err = -ENOMEM; 1089 goto out; 1090 } 1091 1092 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1093 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1094 for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) 1095 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1096 1097 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1098 1099 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1100 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1101 for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) 1102 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1103 1104 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1105 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1106 AMDGPU_UCODE_OPTIONAL, 1107 "amdgpu/%s_mec_2.bin", chip_name); 1108 if (err == -ENODEV) { 1109 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1110 AMDGPU_UCODE_REQUIRED, 1111 "amdgpu/%s_mec.bin", chip_name); 1112 } 1113 } else { 1114 err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, 1115 AMDGPU_UCODE_REQUIRED, 1116 "amdgpu/%s_mec.bin", chip_name); 1117 } 1118 if (err) 1119 goto out; 1120 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1121 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1122 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1123 1124 if ((adev->asic_type != CHIP_STONEY) && 1125 (adev->asic_type != CHIP_TOPAZ)) { 1126 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1127 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1128 AMDGPU_UCODE_OPTIONAL, 1129 "amdgpu/%s_mec2_2.bin", chip_name); 1130 if (err == -ENODEV) { 1131 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1132 AMDGPU_UCODE_REQUIRED, 1133 "amdgpu/%s_mec2.bin", chip_name); 1134 } 1135 } else { 1136 err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, 1137 AMDGPU_UCODE_REQUIRED, 1138 "amdgpu/%s_mec2.bin", chip_name); 1139 } 1140 if (!err) { 1141 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1142 adev->gfx.mec2_fw->data; 1143 adev->gfx.mec2_fw_version = 1144 le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec2_feature_version = 1146 le32_to_cpu(cp_hdr->ucode_feature_version); 1147 } else { 1148 err = 0; 1149 adev->gfx.mec2_fw = NULL; 1150 } 1151 } 1152 1153 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1154 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1155 info->fw = adev->gfx.pfp_fw; 1156 header = (const struct common_firmware_header *)info->fw->data; 1157 adev->firmware.fw_size += 1158 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1159 1160 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1161 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1162 info->fw = adev->gfx.me_fw; 1163 header = (const struct common_firmware_header *)info->fw->data; 1164 adev->firmware.fw_size += 1165 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1166 1167 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1168 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1169 info->fw = adev->gfx.ce_fw; 1170 header = (const struct common_firmware_header *)info->fw->data; 1171 adev->firmware.fw_size += 1172 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1173 1174 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1175 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1176 info->fw = adev->gfx.rlc_fw; 1177 header = (const struct common_firmware_header *)info->fw->data; 1178 adev->firmware.fw_size += 1179 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1180 1181 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1182 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1183 info->fw = adev->gfx.mec_fw; 1184 header = (const struct common_firmware_header *)info->fw->data; 1185 adev->firmware.fw_size += 1186 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1187 1188 /* we need account JT in */ 1189 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1190 adev->firmware.fw_size += 1191 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1192 1193 if (amdgpu_sriov_vf(adev)) { 1194 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1195 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1196 info->fw = adev->gfx.mec_fw; 1197 adev->firmware.fw_size += 1198 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1199 } 1200 1201 if (adev->gfx.mec2_fw) { 1202 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1203 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1204 info->fw = adev->gfx.mec2_fw; 1205 header = (const struct common_firmware_header *)info->fw->data; 1206 adev->firmware.fw_size += 1207 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1208 } 1209 1210 out: 1211 if (err) { 1212 dev_err(adev->dev, "gfx8: Failed to load firmware %s gfx firmware\n", chip_name); 1213 amdgpu_ucode_release(&adev->gfx.pfp_fw); 1214 amdgpu_ucode_release(&adev->gfx.me_fw); 1215 amdgpu_ucode_release(&adev->gfx.ce_fw); 1216 amdgpu_ucode_release(&adev->gfx.rlc_fw); 1217 amdgpu_ucode_release(&adev->gfx.mec_fw); 1218 amdgpu_ucode_release(&adev->gfx.mec2_fw); 1219 } 1220 return err; 1221 } 1222 1223 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, u32 *buffer) 1224 { 1225 u32 count = 0; 1226 1227 if (adev->gfx.rlc.cs_data == NULL) 1228 return; 1229 if (buffer == NULL) 1230 return; 1231 1232 count = amdgpu_gfx_csb_preamble_start(buffer); 1233 count = amdgpu_gfx_csb_data_parser(adev, buffer, count); 1234 1235 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1236 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 1237 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1238 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1239 1240 amdgpu_gfx_csb_preamble_end(buffer, count); 1241 } 1242 1243 static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) 1244 { 1245 if (adev->asic_type == CHIP_CARRIZO) 1246 return 5; 1247 else 1248 return 4; 1249 } 1250 1251 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1252 { 1253 const struct cs_section_def *cs_data; 1254 int r; 1255 1256 adev->gfx.rlc.cs_data = vi_cs_data; 1257 1258 cs_data = adev->gfx.rlc.cs_data; 1259 1260 if (cs_data) { 1261 /* init clear state block */ 1262 r = amdgpu_gfx_rlc_init_csb(adev); 1263 if (r) 1264 return r; 1265 } 1266 1267 if ((adev->asic_type == CHIP_CARRIZO) || 1268 (adev->asic_type == CHIP_STONEY)) { 1269 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1270 r = amdgpu_gfx_rlc_init_cpt(adev); 1271 if (r) 1272 return r; 1273 } 1274 1275 /* init spm vmid with 0xf */ 1276 if (adev->gfx.rlc.funcs->update_spm_vmid) 1277 adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); 1278 1279 return 0; 1280 } 1281 1282 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1283 { 1284 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1285 } 1286 1287 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1288 { 1289 int r; 1290 u32 *hpd; 1291 size_t mec_hpd_size; 1292 1293 bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1294 1295 /* take ownership of the relevant compute queues */ 1296 amdgpu_gfx_compute_queue_acquire(adev); 1297 1298 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1299 if (mec_hpd_size) { 1300 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1301 AMDGPU_GEM_DOMAIN_VRAM | 1302 AMDGPU_GEM_DOMAIN_GTT, 1303 &adev->gfx.mec.hpd_eop_obj, 1304 &adev->gfx.mec.hpd_eop_gpu_addr, 1305 (void **)&hpd); 1306 if (r) { 1307 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1308 return r; 1309 } 1310 1311 memset(hpd, 0, mec_hpd_size); 1312 1313 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1314 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1315 } 1316 1317 return 0; 1318 } 1319 1320 static const u32 vgpr_init_compute_shader[] = 1321 { 1322 0x7e000209, 0x7e020208, 1323 0x7e040207, 0x7e060206, 1324 0x7e080205, 0x7e0a0204, 1325 0x7e0c0203, 0x7e0e0202, 1326 0x7e100201, 0x7e120200, 1327 0x7e140209, 0x7e160208, 1328 0x7e180207, 0x7e1a0206, 1329 0x7e1c0205, 0x7e1e0204, 1330 0x7e200203, 0x7e220202, 1331 0x7e240201, 0x7e260200, 1332 0x7e280209, 0x7e2a0208, 1333 0x7e2c0207, 0x7e2e0206, 1334 0x7e300205, 0x7e320204, 1335 0x7e340203, 0x7e360202, 1336 0x7e380201, 0x7e3a0200, 1337 0x7e3c0209, 0x7e3e0208, 1338 0x7e400207, 0x7e420206, 1339 0x7e440205, 0x7e460204, 1340 0x7e480203, 0x7e4a0202, 1341 0x7e4c0201, 0x7e4e0200, 1342 0x7e500209, 0x7e520208, 1343 0x7e540207, 0x7e560206, 1344 0x7e580205, 0x7e5a0204, 1345 0x7e5c0203, 0x7e5e0202, 1346 0x7e600201, 0x7e620200, 1347 0x7e640209, 0x7e660208, 1348 0x7e680207, 0x7e6a0206, 1349 0x7e6c0205, 0x7e6e0204, 1350 0x7e700203, 0x7e720202, 1351 0x7e740201, 0x7e760200, 1352 0x7e780209, 0x7e7a0208, 1353 0x7e7c0207, 0x7e7e0206, 1354 0xbf8a0000, 0xbf810000, 1355 }; 1356 1357 static const u32 sgpr_init_compute_shader[] = 1358 { 1359 0xbe8a0100, 0xbe8c0102, 1360 0xbe8e0104, 0xbe900106, 1361 0xbe920108, 0xbe940100, 1362 0xbe960102, 0xbe980104, 1363 0xbe9a0106, 0xbe9c0108, 1364 0xbe9e0100, 0xbea00102, 1365 0xbea20104, 0xbea40106, 1366 0xbea60108, 0xbea80100, 1367 0xbeaa0102, 0xbeac0104, 1368 0xbeae0106, 0xbeb00108, 1369 0xbeb20100, 0xbeb40102, 1370 0xbeb60104, 0xbeb80106, 1371 0xbeba0108, 0xbebc0100, 1372 0xbebe0102, 0xbec00104, 1373 0xbec20106, 0xbec40108, 1374 0xbec60100, 0xbec80102, 1375 0xbee60004, 0xbee70005, 1376 0xbeea0006, 0xbeeb0007, 1377 0xbee80008, 0xbee90009, 1378 0xbefc0000, 0xbf8a0000, 1379 0xbf810000, 0x00000000, 1380 }; 1381 1382 static const u32 vgpr_init_regs[] = 1383 { 1384 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1385 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1386 mmCOMPUTE_NUM_THREAD_X, 256*4, 1387 mmCOMPUTE_NUM_THREAD_Y, 1, 1388 mmCOMPUTE_NUM_THREAD_Z, 1, 1389 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1390 mmCOMPUTE_PGM_RSRC2, 20, 1391 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1392 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1393 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1394 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1395 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1396 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1397 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1398 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1399 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1400 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1401 }; 1402 1403 static const u32 sgpr1_init_regs[] = 1404 { 1405 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1406 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1407 mmCOMPUTE_NUM_THREAD_X, 256*5, 1408 mmCOMPUTE_NUM_THREAD_Y, 1, 1409 mmCOMPUTE_NUM_THREAD_Z, 1, 1410 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1411 mmCOMPUTE_PGM_RSRC2, 20, 1412 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1413 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1414 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1415 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1416 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1417 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1418 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1419 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1420 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1421 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1422 }; 1423 1424 static const u32 sgpr2_init_regs[] = 1425 { 1426 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1427 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1428 mmCOMPUTE_NUM_THREAD_X, 256*5, 1429 mmCOMPUTE_NUM_THREAD_Y, 1, 1430 mmCOMPUTE_NUM_THREAD_Z, 1, 1431 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1432 mmCOMPUTE_PGM_RSRC2, 20, 1433 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1434 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1435 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1436 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1437 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1438 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1439 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1440 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1441 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1442 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1443 }; 1444 1445 static const u32 sec_ded_counter_registers[] = 1446 { 1447 mmCPC_EDC_ATC_CNT, 1448 mmCPC_EDC_SCRATCH_CNT, 1449 mmCPC_EDC_UCODE_CNT, 1450 mmCPF_EDC_ATC_CNT, 1451 mmCPF_EDC_ROQ_CNT, 1452 mmCPF_EDC_TAG_CNT, 1453 mmCPG_EDC_ATC_CNT, 1454 mmCPG_EDC_DMA_CNT, 1455 mmCPG_EDC_TAG_CNT, 1456 mmDC_EDC_CSINVOC_CNT, 1457 mmDC_EDC_RESTORE_CNT, 1458 mmDC_EDC_STATE_CNT, 1459 mmGDS_EDC_CNT, 1460 mmGDS_EDC_GRBM_CNT, 1461 mmGDS_EDC_OA_DED, 1462 mmSPI_EDC_CNT, 1463 mmSQC_ATC_EDC_GATCL1_CNT, 1464 mmSQC_EDC_CNT, 1465 mmSQ_EDC_DED_CNT, 1466 mmSQ_EDC_INFO, 1467 mmSQ_EDC_SEC_CNT, 1468 mmTCC_EDC_CNT, 1469 mmTCP_ATC_EDC_GATCL1_CNT, 1470 mmTCP_EDC_CNT, 1471 mmTD_EDC_CNT 1472 }; 1473 1474 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1475 { 1476 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1477 struct amdgpu_ib ib; 1478 struct dma_fence *f = NULL; 1479 int r, i; 1480 u32 tmp; 1481 unsigned total_size, vgpr_offset, sgpr_offset; 1482 u64 gpu_addr; 1483 1484 /* only supported on CZ */ 1485 if (adev->asic_type != CHIP_CARRIZO) 1486 return 0; 1487 1488 /* bail if the compute ring is not ready */ 1489 if (!ring->sched.ready) 1490 return 0; 1491 1492 tmp = RREG32(mmGB_EDC_MODE); 1493 WREG32(mmGB_EDC_MODE, 0); 1494 1495 total_size = 1496 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1497 total_size += 1498 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1499 total_size += 1500 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1501 total_size = ALIGN(total_size, 256); 1502 vgpr_offset = total_size; 1503 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1504 sgpr_offset = total_size; 1505 total_size += sizeof(sgpr_init_compute_shader); 1506 1507 /* allocate an indirect buffer to put the commands in */ 1508 memset(&ib, 0, sizeof(ib)); 1509 r = amdgpu_ib_get(adev, NULL, total_size, 1510 AMDGPU_IB_POOL_DIRECT, &ib); 1511 if (r) { 1512 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1513 return r; 1514 } 1515 1516 /* load the compute shaders */ 1517 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1518 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1519 1520 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1521 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1522 1523 /* init the ib length to 0 */ 1524 ib.length_dw = 0; 1525 1526 /* VGPR */ 1527 /* write the register state for the compute dispatch */ 1528 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1529 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1530 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1531 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1532 } 1533 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1534 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1535 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1536 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1537 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1538 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1539 1540 /* write dispatch packet */ 1541 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1542 ib.ptr[ib.length_dw++] = 8; /* x */ 1543 ib.ptr[ib.length_dw++] = 1; /* y */ 1544 ib.ptr[ib.length_dw++] = 1; /* z */ 1545 ib.ptr[ib.length_dw++] = 1546 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1547 1548 /* write CS partial flush packet */ 1549 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1550 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1551 1552 /* SGPR1 */ 1553 /* write the register state for the compute dispatch */ 1554 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1555 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1556 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1557 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1558 } 1559 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1560 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1561 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1562 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1563 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1564 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1565 1566 /* write dispatch packet */ 1567 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1568 ib.ptr[ib.length_dw++] = 8; /* x */ 1569 ib.ptr[ib.length_dw++] = 1; /* y */ 1570 ib.ptr[ib.length_dw++] = 1; /* z */ 1571 ib.ptr[ib.length_dw++] = 1572 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1573 1574 /* write CS partial flush packet */ 1575 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1576 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1577 1578 /* SGPR2 */ 1579 /* write the register state for the compute dispatch */ 1580 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1581 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1582 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1583 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1584 } 1585 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1586 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1587 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1588 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1589 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1590 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1591 1592 /* write dispatch packet */ 1593 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1594 ib.ptr[ib.length_dw++] = 8; /* x */ 1595 ib.ptr[ib.length_dw++] = 1; /* y */ 1596 ib.ptr[ib.length_dw++] = 1; /* z */ 1597 ib.ptr[ib.length_dw++] = 1598 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1599 1600 /* write CS partial flush packet */ 1601 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1602 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1603 1604 /* shedule the ib on the ring */ 1605 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1606 if (r) { 1607 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1608 goto fail; 1609 } 1610 1611 /* wait for the GPU to finish processing the IB */ 1612 r = dma_fence_wait(f, false); 1613 if (r) { 1614 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1615 goto fail; 1616 } 1617 1618 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1619 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1620 WREG32(mmGB_EDC_MODE, tmp); 1621 1622 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1623 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1624 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1625 1626 1627 /* read back registers to clear the counters */ 1628 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1629 RREG32(sec_ded_counter_registers[i]); 1630 1631 fail: 1632 amdgpu_ib_free(&ib, NULL); 1633 dma_fence_put(f); 1634 1635 return r; 1636 } 1637 1638 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1639 { 1640 u32 gb_addr_config; 1641 u32 mc_arb_ramcfg; 1642 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1643 u32 tmp; 1644 int ret; 1645 1646 switch (adev->asic_type) { 1647 case CHIP_TOPAZ: 1648 adev->gfx.config.max_shader_engines = 1; 1649 adev->gfx.config.max_tile_pipes = 2; 1650 adev->gfx.config.max_cu_per_sh = 6; 1651 adev->gfx.config.max_sh_per_se = 1; 1652 adev->gfx.config.max_backends_per_se = 2; 1653 adev->gfx.config.max_texture_channel_caches = 2; 1654 adev->gfx.config.max_gprs = 256; 1655 adev->gfx.config.max_gs_threads = 32; 1656 adev->gfx.config.max_hw_contexts = 8; 1657 1658 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1659 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1660 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1661 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1662 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1663 break; 1664 case CHIP_FIJI: 1665 adev->gfx.config.max_shader_engines = 4; 1666 adev->gfx.config.max_tile_pipes = 16; 1667 adev->gfx.config.max_cu_per_sh = 16; 1668 adev->gfx.config.max_sh_per_se = 1; 1669 adev->gfx.config.max_backends_per_se = 4; 1670 adev->gfx.config.max_texture_channel_caches = 16; 1671 adev->gfx.config.max_gprs = 256; 1672 adev->gfx.config.max_gs_threads = 32; 1673 adev->gfx.config.max_hw_contexts = 8; 1674 1675 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1676 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1677 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1678 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1679 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1680 break; 1681 case CHIP_POLARIS11: 1682 case CHIP_POLARIS12: 1683 ret = amdgpu_atombios_get_gfx_info(adev); 1684 if (ret) 1685 return ret; 1686 adev->gfx.config.max_gprs = 256; 1687 adev->gfx.config.max_gs_threads = 32; 1688 adev->gfx.config.max_hw_contexts = 8; 1689 1690 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1691 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1692 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1693 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1694 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1695 break; 1696 case CHIP_POLARIS10: 1697 case CHIP_VEGAM: 1698 ret = amdgpu_atombios_get_gfx_info(adev); 1699 if (ret) 1700 return ret; 1701 adev->gfx.config.max_gprs = 256; 1702 adev->gfx.config.max_gs_threads = 32; 1703 adev->gfx.config.max_hw_contexts = 8; 1704 1705 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1706 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1707 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1708 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1709 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1710 break; 1711 case CHIP_TONGA: 1712 adev->gfx.config.max_shader_engines = 4; 1713 adev->gfx.config.max_tile_pipes = 8; 1714 adev->gfx.config.max_cu_per_sh = 8; 1715 adev->gfx.config.max_sh_per_se = 1; 1716 adev->gfx.config.max_backends_per_se = 2; 1717 adev->gfx.config.max_texture_channel_caches = 8; 1718 adev->gfx.config.max_gprs = 256; 1719 adev->gfx.config.max_gs_threads = 32; 1720 adev->gfx.config.max_hw_contexts = 8; 1721 1722 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1723 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1724 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1725 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1726 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1727 break; 1728 case CHIP_CARRIZO: 1729 adev->gfx.config.max_shader_engines = 1; 1730 adev->gfx.config.max_tile_pipes = 2; 1731 adev->gfx.config.max_sh_per_se = 1; 1732 adev->gfx.config.max_backends_per_se = 2; 1733 adev->gfx.config.max_cu_per_sh = 8; 1734 adev->gfx.config.max_texture_channel_caches = 2; 1735 adev->gfx.config.max_gprs = 256; 1736 adev->gfx.config.max_gs_threads = 32; 1737 adev->gfx.config.max_hw_contexts = 8; 1738 1739 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1740 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1741 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1742 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1743 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1744 break; 1745 case CHIP_STONEY: 1746 adev->gfx.config.max_shader_engines = 1; 1747 adev->gfx.config.max_tile_pipes = 2; 1748 adev->gfx.config.max_sh_per_se = 1; 1749 adev->gfx.config.max_backends_per_se = 1; 1750 adev->gfx.config.max_cu_per_sh = 3; 1751 adev->gfx.config.max_texture_channel_caches = 2; 1752 adev->gfx.config.max_gprs = 256; 1753 adev->gfx.config.max_gs_threads = 16; 1754 adev->gfx.config.max_hw_contexts = 8; 1755 1756 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1757 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1758 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1759 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1760 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1761 break; 1762 default: 1763 adev->gfx.config.max_shader_engines = 2; 1764 adev->gfx.config.max_tile_pipes = 4; 1765 adev->gfx.config.max_cu_per_sh = 2; 1766 adev->gfx.config.max_sh_per_se = 1; 1767 adev->gfx.config.max_backends_per_se = 2; 1768 adev->gfx.config.max_texture_channel_caches = 4; 1769 adev->gfx.config.max_gprs = 256; 1770 adev->gfx.config.max_gs_threads = 32; 1771 adev->gfx.config.max_hw_contexts = 8; 1772 1773 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1774 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1775 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1776 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1777 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1778 break; 1779 } 1780 1781 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1782 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1783 1784 adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, 1785 MC_ARB_RAMCFG, NOOFBANK); 1786 adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, 1787 MC_ARB_RAMCFG, NOOFRANKS); 1788 1789 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1790 adev->gfx.config.mem_max_burst_length_bytes = 256; 1791 if (adev->flags & AMD_IS_APU) { 1792 /* Get memory bank mapping mode. */ 1793 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1794 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1795 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1796 1797 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1798 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1799 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1800 1801 /* Validate settings in case only one DIMM installed. */ 1802 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1803 dimm00_addr_map = 0; 1804 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1805 dimm01_addr_map = 0; 1806 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1807 dimm10_addr_map = 0; 1808 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1809 dimm11_addr_map = 0; 1810 1811 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1812 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1813 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1814 adev->gfx.config.mem_row_size_in_kb = 2; 1815 else 1816 adev->gfx.config.mem_row_size_in_kb = 1; 1817 } else { 1818 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1819 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1820 if (adev->gfx.config.mem_row_size_in_kb > 4) 1821 adev->gfx.config.mem_row_size_in_kb = 4; 1822 } 1823 1824 adev->gfx.config.shader_engine_tile_size = 32; 1825 adev->gfx.config.num_gpus = 1; 1826 adev->gfx.config.multi_gpu_tile_size = 64; 1827 1828 /* fix up row size */ 1829 switch (adev->gfx.config.mem_row_size_in_kb) { 1830 case 1: 1831 default: 1832 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1833 break; 1834 case 2: 1835 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1836 break; 1837 case 4: 1838 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1839 break; 1840 } 1841 adev->gfx.config.gb_addr_config = gb_addr_config; 1842 1843 return 0; 1844 } 1845 1846 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1847 int mec, int pipe, int queue) 1848 { 1849 int r; 1850 unsigned irq_type; 1851 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1852 unsigned int hw_prio; 1853 1854 ring = &adev->gfx.compute_ring[ring_id]; 1855 1856 /* mec0 is me1 */ 1857 ring->me = mec + 1; 1858 ring->pipe = pipe; 1859 ring->queue = queue; 1860 1861 ring->ring_obj = NULL; 1862 ring->use_doorbell = true; 1863 ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; 1864 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 1865 + (ring_id * GFX8_MEC_HPD_SIZE); 1866 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 1867 1868 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 1869 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 1870 + ring->pipe; 1871 1872 hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? 1873 AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; 1874 /* type-2 packets are deprecated on MEC, use type-3 instead */ 1875 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, 1876 hw_prio, NULL); 1877 if (r) 1878 return r; 1879 1880 1881 return 0; 1882 } 1883 1884 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 1885 1886 static int gfx_v8_0_sw_init(struct amdgpu_ip_block *ip_block) 1887 { 1888 int i, j, k, r, ring_id; 1889 int xcc_id = 0; 1890 struct amdgpu_ring *ring; 1891 struct amdgpu_device *adev = ip_block->adev; 1892 1893 switch (adev->asic_type) { 1894 case CHIP_TONGA: 1895 case CHIP_CARRIZO: 1896 case CHIP_FIJI: 1897 case CHIP_POLARIS10: 1898 case CHIP_POLARIS11: 1899 case CHIP_POLARIS12: 1900 case CHIP_VEGAM: 1901 adev->gfx.mec.num_mec = 2; 1902 break; 1903 case CHIP_TOPAZ: 1904 case CHIP_STONEY: 1905 default: 1906 adev->gfx.mec.num_mec = 1; 1907 break; 1908 } 1909 1910 adev->gfx.mec.num_pipe_per_mec = 4; 1911 adev->gfx.mec.num_queue_per_pipe = 8; 1912 1913 /* EOP Event */ 1914 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 1915 if (r) 1916 return r; 1917 1918 /* Privileged reg */ 1919 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 1920 &adev->gfx.priv_reg_irq); 1921 if (r) 1922 return r; 1923 1924 /* Privileged inst */ 1925 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 1926 &adev->gfx.priv_inst_irq); 1927 if (r) 1928 return r; 1929 1930 /* Add CP EDC/ECC irq */ 1931 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 1932 &adev->gfx.cp_ecc_error_irq); 1933 if (r) 1934 return r; 1935 1936 /* SQ interrupts. */ 1937 r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 1938 &adev->gfx.sq_irq); 1939 if (r) { 1940 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 1941 return r; 1942 } 1943 1944 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 1945 1946 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 1947 1948 r = gfx_v8_0_init_microcode(adev); 1949 if (r) { 1950 DRM_ERROR("Failed to load gfx firmware!\n"); 1951 return r; 1952 } 1953 1954 r = adev->gfx.rlc.funcs->init(adev); 1955 if (r) { 1956 DRM_ERROR("Failed to init rlc BOs!\n"); 1957 return r; 1958 } 1959 1960 r = gfx_v8_0_mec_init(adev); 1961 if (r) { 1962 DRM_ERROR("Failed to init MEC BOs!\n"); 1963 return r; 1964 } 1965 1966 /* set up the gfx ring */ 1967 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 1968 ring = &adev->gfx.gfx_ring[i]; 1969 ring->ring_obj = NULL; 1970 sprintf(ring->name, "gfx"); 1971 /* no gfx doorbells on iceland */ 1972 if (adev->asic_type != CHIP_TOPAZ) { 1973 ring->use_doorbell = true; 1974 ring->doorbell_index = adev->doorbell_index.gfx_ring0; 1975 } 1976 1977 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 1978 AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, 1979 AMDGPU_RING_PRIO_DEFAULT, NULL); 1980 if (r) 1981 return r; 1982 } 1983 1984 1985 /* set up the compute queues - allocate horizontally across pipes */ 1986 ring_id = 0; 1987 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 1988 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 1989 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 1990 if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, 1991 k, j)) 1992 continue; 1993 1994 r = gfx_v8_0_compute_ring_init(adev, 1995 ring_id, 1996 i, k, j); 1997 if (r) 1998 return r; 1999 2000 ring_id++; 2001 } 2002 } 2003 } 2004 2005 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE, 0); 2006 if (r) { 2007 DRM_ERROR("Failed to init KIQ BOs!\n"); 2008 return r; 2009 } 2010 2011 r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); 2012 if (r) 2013 return r; 2014 2015 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2016 r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation), 0); 2017 if (r) 2018 return r; 2019 2020 adev->gfx.ce_ram_size = 0x8000; 2021 2022 r = gfx_v8_0_gpu_early_init(adev); 2023 if (r) 2024 return r; 2025 2026 return 0; 2027 } 2028 2029 static int gfx_v8_0_sw_fini(struct amdgpu_ip_block *ip_block) 2030 { 2031 struct amdgpu_device *adev = ip_block->adev; 2032 int i; 2033 2034 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2035 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2036 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2037 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2038 2039 amdgpu_gfx_mqd_sw_fini(adev, 0); 2040 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); 2041 amdgpu_gfx_kiq_fini(adev, 0); 2042 2043 gfx_v8_0_mec_fini(adev); 2044 amdgpu_gfx_rlc_fini(adev); 2045 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2046 &adev->gfx.rlc.clear_state_gpu_addr, 2047 (void **)&adev->gfx.rlc.cs_ptr); 2048 if ((adev->asic_type == CHIP_CARRIZO) || 2049 (adev->asic_type == CHIP_STONEY)) { 2050 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2051 &adev->gfx.rlc.cp_table_gpu_addr, 2052 (void **)&adev->gfx.rlc.cp_table_ptr); 2053 } 2054 gfx_v8_0_free_microcode(adev); 2055 2056 return 0; 2057 } 2058 2059 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2060 { 2061 uint32_t *modearray, *mod2array; 2062 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2063 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2064 u32 reg_offset; 2065 2066 modearray = adev->gfx.config.tile_mode_array; 2067 mod2array = adev->gfx.config.macrotile_mode_array; 2068 2069 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2070 modearray[reg_offset] = 0; 2071 2072 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2073 mod2array[reg_offset] = 0; 2074 2075 switch (adev->asic_type) { 2076 case CHIP_TOPAZ: 2077 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2078 PIPE_CONFIG(ADDR_SURF_P2) | 2079 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2080 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2081 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2082 PIPE_CONFIG(ADDR_SURF_P2) | 2083 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2085 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2086 PIPE_CONFIG(ADDR_SURF_P2) | 2087 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2088 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2089 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2090 PIPE_CONFIG(ADDR_SURF_P2) | 2091 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2092 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2093 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2094 PIPE_CONFIG(ADDR_SURF_P2) | 2095 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2097 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2098 PIPE_CONFIG(ADDR_SURF_P2) | 2099 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2100 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2101 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2102 PIPE_CONFIG(ADDR_SURF_P2) | 2103 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2105 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2106 PIPE_CONFIG(ADDR_SURF_P2)); 2107 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2108 PIPE_CONFIG(ADDR_SURF_P2) | 2109 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2111 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2112 PIPE_CONFIG(ADDR_SURF_P2) | 2113 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2115 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2116 PIPE_CONFIG(ADDR_SURF_P2) | 2117 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2119 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2120 PIPE_CONFIG(ADDR_SURF_P2) | 2121 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2122 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2123 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2124 PIPE_CONFIG(ADDR_SURF_P2) | 2125 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2126 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2127 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2128 PIPE_CONFIG(ADDR_SURF_P2) | 2129 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2130 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2131 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2132 PIPE_CONFIG(ADDR_SURF_P2) | 2133 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2134 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2135 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2136 PIPE_CONFIG(ADDR_SURF_P2) | 2137 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2139 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2140 PIPE_CONFIG(ADDR_SURF_P2) | 2141 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2143 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2144 PIPE_CONFIG(ADDR_SURF_P2) | 2145 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2147 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2148 PIPE_CONFIG(ADDR_SURF_P2) | 2149 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2151 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2152 PIPE_CONFIG(ADDR_SURF_P2) | 2153 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2155 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2156 PIPE_CONFIG(ADDR_SURF_P2) | 2157 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2159 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2160 PIPE_CONFIG(ADDR_SURF_P2) | 2161 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2163 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2164 PIPE_CONFIG(ADDR_SURF_P2) | 2165 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2167 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2168 PIPE_CONFIG(ADDR_SURF_P2) | 2169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2171 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2172 PIPE_CONFIG(ADDR_SURF_P2) | 2173 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2175 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2176 PIPE_CONFIG(ADDR_SURF_P2) | 2177 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2179 2180 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2183 NUM_BANKS(ADDR_SURF_8_BANK)); 2184 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2185 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2186 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2187 NUM_BANKS(ADDR_SURF_8_BANK)); 2188 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2189 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2190 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2191 NUM_BANKS(ADDR_SURF_8_BANK)); 2192 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2193 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2194 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2195 NUM_BANKS(ADDR_SURF_8_BANK)); 2196 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2197 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2198 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2199 NUM_BANKS(ADDR_SURF_8_BANK)); 2200 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2203 NUM_BANKS(ADDR_SURF_8_BANK)); 2204 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2205 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2206 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2207 NUM_BANKS(ADDR_SURF_8_BANK)); 2208 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2211 NUM_BANKS(ADDR_SURF_16_BANK)); 2212 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2215 NUM_BANKS(ADDR_SURF_16_BANK)); 2216 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2219 NUM_BANKS(ADDR_SURF_16_BANK)); 2220 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2223 NUM_BANKS(ADDR_SURF_16_BANK)); 2224 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2227 NUM_BANKS(ADDR_SURF_16_BANK)); 2228 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2231 NUM_BANKS(ADDR_SURF_16_BANK)); 2232 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2235 NUM_BANKS(ADDR_SURF_8_BANK)); 2236 2237 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2238 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2239 reg_offset != 23) 2240 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2241 2242 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2243 if (reg_offset != 7) 2244 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2245 2246 break; 2247 case CHIP_FIJI: 2248 case CHIP_VEGAM: 2249 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2250 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2251 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2252 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2253 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2254 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2255 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2257 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2258 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2259 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2261 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2262 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2263 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2265 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2266 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2267 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2269 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2270 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2271 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2273 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2274 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2275 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2277 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2278 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2279 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2281 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2282 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2283 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2284 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2285 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2287 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2288 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2289 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2291 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2292 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2293 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2295 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2296 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2297 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2299 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2300 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2301 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2303 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2304 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2305 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2307 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2308 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2309 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2311 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2312 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2313 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2315 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2316 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2317 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2319 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2320 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2321 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2323 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2325 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2327 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2329 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2331 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2333 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2335 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2337 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2339 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2340 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2341 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2343 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2345 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2347 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2349 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2351 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2352 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2355 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2357 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2359 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2360 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2361 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2363 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2364 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2365 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2367 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2368 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2369 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2371 2372 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2373 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2374 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2375 NUM_BANKS(ADDR_SURF_8_BANK)); 2376 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2377 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2378 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2379 NUM_BANKS(ADDR_SURF_8_BANK)); 2380 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2381 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2382 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2383 NUM_BANKS(ADDR_SURF_8_BANK)); 2384 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2387 NUM_BANKS(ADDR_SURF_8_BANK)); 2388 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2391 NUM_BANKS(ADDR_SURF_8_BANK)); 2392 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2395 NUM_BANKS(ADDR_SURF_8_BANK)); 2396 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2399 NUM_BANKS(ADDR_SURF_8_BANK)); 2400 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2403 NUM_BANKS(ADDR_SURF_8_BANK)); 2404 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2407 NUM_BANKS(ADDR_SURF_8_BANK)); 2408 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2411 NUM_BANKS(ADDR_SURF_8_BANK)); 2412 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2415 NUM_BANKS(ADDR_SURF_8_BANK)); 2416 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2419 NUM_BANKS(ADDR_SURF_8_BANK)); 2420 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2423 NUM_BANKS(ADDR_SURF_8_BANK)); 2424 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2427 NUM_BANKS(ADDR_SURF_4_BANK)); 2428 2429 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2430 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2431 2432 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2433 if (reg_offset != 7) 2434 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2435 2436 break; 2437 case CHIP_TONGA: 2438 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2439 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2440 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2441 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2442 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2443 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2444 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2445 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2446 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2447 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2448 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2449 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2450 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2452 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2453 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2454 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2456 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2457 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2458 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2460 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2461 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2462 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2464 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2465 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2466 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2468 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2469 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2470 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2471 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2472 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2473 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2474 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2475 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2476 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2477 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2478 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2479 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2480 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2481 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2482 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2483 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2484 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2485 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2486 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2487 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2488 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2489 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2490 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2491 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2492 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2493 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2494 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2495 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2496 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2497 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2498 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2499 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2500 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2501 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2502 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2503 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2504 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2505 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2506 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2507 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2508 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2509 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2510 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2511 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2512 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2514 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2515 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2516 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2518 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2519 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2520 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2522 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2523 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2524 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2526 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2527 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2528 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2529 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2530 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2531 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2532 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2534 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2535 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2536 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2538 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2539 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2540 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2541 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2542 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2543 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2544 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2546 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2547 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2548 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2549 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2550 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2552 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2553 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2554 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2555 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2556 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2557 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2558 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2559 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2560 2561 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2562 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2563 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2564 NUM_BANKS(ADDR_SURF_16_BANK)); 2565 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2566 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2567 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2568 NUM_BANKS(ADDR_SURF_16_BANK)); 2569 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2570 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2571 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2572 NUM_BANKS(ADDR_SURF_16_BANK)); 2573 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2574 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2575 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2576 NUM_BANKS(ADDR_SURF_16_BANK)); 2577 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2578 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2579 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2580 NUM_BANKS(ADDR_SURF_16_BANK)); 2581 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2582 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2583 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2584 NUM_BANKS(ADDR_SURF_16_BANK)); 2585 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2586 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2587 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2588 NUM_BANKS(ADDR_SURF_16_BANK)); 2589 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2590 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2591 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2592 NUM_BANKS(ADDR_SURF_16_BANK)); 2593 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2594 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2595 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2596 NUM_BANKS(ADDR_SURF_16_BANK)); 2597 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2598 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2599 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2600 NUM_BANKS(ADDR_SURF_16_BANK)); 2601 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2602 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2603 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2604 NUM_BANKS(ADDR_SURF_16_BANK)); 2605 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2606 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2607 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2608 NUM_BANKS(ADDR_SURF_8_BANK)); 2609 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2610 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2611 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2612 NUM_BANKS(ADDR_SURF_4_BANK)); 2613 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2614 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2615 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2616 NUM_BANKS(ADDR_SURF_4_BANK)); 2617 2618 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2619 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2620 2621 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2622 if (reg_offset != 7) 2623 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2624 2625 break; 2626 case CHIP_POLARIS11: 2627 case CHIP_POLARIS12: 2628 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2629 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2630 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2631 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2632 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2633 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2634 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2635 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2636 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2637 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2638 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2639 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2640 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2641 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2642 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2643 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2644 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2645 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2646 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2647 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2648 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2649 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2650 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2651 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2652 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2653 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2654 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2655 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2656 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2657 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2658 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2659 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2660 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2661 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2662 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2663 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2664 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2666 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2667 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2668 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2670 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2671 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2672 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2674 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2675 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2676 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2678 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2679 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2682 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2686 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2687 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2690 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2691 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2694 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2695 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2698 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2699 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2700 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2702 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2703 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2704 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2706 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2707 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2708 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2710 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2712 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2714 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2715 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2716 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2717 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2718 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2719 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2720 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2721 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2722 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2723 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2724 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2725 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2726 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2727 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2728 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2729 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2730 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2732 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2734 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2736 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2738 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2740 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2742 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2744 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2746 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2748 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2750 2751 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2754 NUM_BANKS(ADDR_SURF_16_BANK)); 2755 2756 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2759 NUM_BANKS(ADDR_SURF_16_BANK)); 2760 2761 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2762 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2763 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2764 NUM_BANKS(ADDR_SURF_16_BANK)); 2765 2766 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2767 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2768 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2769 NUM_BANKS(ADDR_SURF_16_BANK)); 2770 2771 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2772 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2773 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2774 NUM_BANKS(ADDR_SURF_16_BANK)); 2775 2776 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2779 NUM_BANKS(ADDR_SURF_16_BANK)); 2780 2781 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2782 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2783 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2784 NUM_BANKS(ADDR_SURF_16_BANK)); 2785 2786 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2789 NUM_BANKS(ADDR_SURF_16_BANK)); 2790 2791 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2792 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2793 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2794 NUM_BANKS(ADDR_SURF_16_BANK)); 2795 2796 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2797 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2798 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2799 NUM_BANKS(ADDR_SURF_16_BANK)); 2800 2801 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2802 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2803 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2804 NUM_BANKS(ADDR_SURF_16_BANK)); 2805 2806 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2809 NUM_BANKS(ADDR_SURF_16_BANK)); 2810 2811 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2812 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2813 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2814 NUM_BANKS(ADDR_SURF_8_BANK)); 2815 2816 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2817 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2818 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2819 NUM_BANKS(ADDR_SURF_4_BANK)); 2820 2821 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2822 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2823 2824 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2825 if (reg_offset != 7) 2826 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2827 2828 break; 2829 case CHIP_POLARIS10: 2830 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2831 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2832 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2833 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2834 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2835 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2836 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2837 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2838 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2839 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2840 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2841 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2842 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2843 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2844 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2845 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2846 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2847 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2850 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2851 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2854 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2855 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2858 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2859 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2862 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2863 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2864 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2865 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2866 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2868 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2869 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2870 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2872 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2873 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2874 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2876 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2878 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2880 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2881 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2884 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2885 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2888 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2889 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2890 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2892 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2893 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2894 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2896 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2900 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2901 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2904 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2905 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2906 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2908 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2909 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2910 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2912 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2913 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2914 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2916 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2917 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2918 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2920 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2921 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2924 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2925 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2928 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2929 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2930 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2932 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2936 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2938 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2940 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2942 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2944 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2945 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2946 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2948 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2949 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2950 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2952 2953 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2956 NUM_BANKS(ADDR_SURF_16_BANK)); 2957 2958 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2961 NUM_BANKS(ADDR_SURF_16_BANK)); 2962 2963 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2966 NUM_BANKS(ADDR_SURF_16_BANK)); 2967 2968 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2971 NUM_BANKS(ADDR_SURF_16_BANK)); 2972 2973 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2976 NUM_BANKS(ADDR_SURF_16_BANK)); 2977 2978 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2981 NUM_BANKS(ADDR_SURF_16_BANK)); 2982 2983 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2986 NUM_BANKS(ADDR_SURF_16_BANK)); 2987 2988 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2989 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2990 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2991 NUM_BANKS(ADDR_SURF_16_BANK)); 2992 2993 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2996 NUM_BANKS(ADDR_SURF_16_BANK)); 2997 2998 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2999 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3000 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3001 NUM_BANKS(ADDR_SURF_16_BANK)); 3002 3003 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3004 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3005 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3006 NUM_BANKS(ADDR_SURF_16_BANK)); 3007 3008 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3011 NUM_BANKS(ADDR_SURF_8_BANK)); 3012 3013 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3016 NUM_BANKS(ADDR_SURF_4_BANK)); 3017 3018 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3019 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3020 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3021 NUM_BANKS(ADDR_SURF_4_BANK)); 3022 3023 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3024 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3025 3026 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3027 if (reg_offset != 7) 3028 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3029 3030 break; 3031 case CHIP_STONEY: 3032 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3033 PIPE_CONFIG(ADDR_SURF_P2) | 3034 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3035 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3036 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3037 PIPE_CONFIG(ADDR_SURF_P2) | 3038 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3039 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3040 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3041 PIPE_CONFIG(ADDR_SURF_P2) | 3042 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3043 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3044 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3045 PIPE_CONFIG(ADDR_SURF_P2) | 3046 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3047 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3048 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3049 PIPE_CONFIG(ADDR_SURF_P2) | 3050 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3051 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3052 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3053 PIPE_CONFIG(ADDR_SURF_P2) | 3054 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3055 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3056 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3057 PIPE_CONFIG(ADDR_SURF_P2) | 3058 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3059 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3060 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3061 PIPE_CONFIG(ADDR_SURF_P2)); 3062 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3063 PIPE_CONFIG(ADDR_SURF_P2) | 3064 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3066 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3067 PIPE_CONFIG(ADDR_SURF_P2) | 3068 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3070 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3071 PIPE_CONFIG(ADDR_SURF_P2) | 3072 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3074 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3075 PIPE_CONFIG(ADDR_SURF_P2) | 3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3078 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3079 PIPE_CONFIG(ADDR_SURF_P2) | 3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3082 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3083 PIPE_CONFIG(ADDR_SURF_P2) | 3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3086 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3087 PIPE_CONFIG(ADDR_SURF_P2) | 3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3090 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3091 PIPE_CONFIG(ADDR_SURF_P2) | 3092 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3094 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3095 PIPE_CONFIG(ADDR_SURF_P2) | 3096 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3098 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3099 PIPE_CONFIG(ADDR_SURF_P2) | 3100 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3102 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3103 PIPE_CONFIG(ADDR_SURF_P2) | 3104 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3106 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3107 PIPE_CONFIG(ADDR_SURF_P2) | 3108 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3109 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3110 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3111 PIPE_CONFIG(ADDR_SURF_P2) | 3112 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3113 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3114 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3115 PIPE_CONFIG(ADDR_SURF_P2) | 3116 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3117 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3118 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3119 PIPE_CONFIG(ADDR_SURF_P2) | 3120 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3121 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3122 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3123 PIPE_CONFIG(ADDR_SURF_P2) | 3124 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3125 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3126 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3127 PIPE_CONFIG(ADDR_SURF_P2) | 3128 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3129 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3130 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3131 PIPE_CONFIG(ADDR_SURF_P2) | 3132 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3134 3135 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3138 NUM_BANKS(ADDR_SURF_8_BANK)); 3139 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3140 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3141 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3142 NUM_BANKS(ADDR_SURF_8_BANK)); 3143 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3146 NUM_BANKS(ADDR_SURF_8_BANK)); 3147 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3150 NUM_BANKS(ADDR_SURF_8_BANK)); 3151 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3152 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3153 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3154 NUM_BANKS(ADDR_SURF_8_BANK)); 3155 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3158 NUM_BANKS(ADDR_SURF_8_BANK)); 3159 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3160 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3161 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3162 NUM_BANKS(ADDR_SURF_8_BANK)); 3163 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3164 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3165 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3166 NUM_BANKS(ADDR_SURF_16_BANK)); 3167 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3170 NUM_BANKS(ADDR_SURF_16_BANK)); 3171 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3172 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3173 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3174 NUM_BANKS(ADDR_SURF_16_BANK)); 3175 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3178 NUM_BANKS(ADDR_SURF_16_BANK)); 3179 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3182 NUM_BANKS(ADDR_SURF_16_BANK)); 3183 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3184 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3185 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3186 NUM_BANKS(ADDR_SURF_16_BANK)); 3187 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3188 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3189 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3190 NUM_BANKS(ADDR_SURF_8_BANK)); 3191 3192 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3193 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3194 reg_offset != 23) 3195 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3196 3197 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3198 if (reg_offset != 7) 3199 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3200 3201 break; 3202 default: 3203 dev_warn(adev->dev, 3204 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3205 adev->asic_type); 3206 fallthrough; 3207 3208 case CHIP_CARRIZO: 3209 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3210 PIPE_CONFIG(ADDR_SURF_P2) | 3211 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3212 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3213 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3214 PIPE_CONFIG(ADDR_SURF_P2) | 3215 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3216 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3217 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3218 PIPE_CONFIG(ADDR_SURF_P2) | 3219 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3220 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3221 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3222 PIPE_CONFIG(ADDR_SURF_P2) | 3223 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3224 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3225 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3226 PIPE_CONFIG(ADDR_SURF_P2) | 3227 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3228 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3229 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3232 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3233 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3236 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3237 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3238 PIPE_CONFIG(ADDR_SURF_P2)); 3239 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3240 PIPE_CONFIG(ADDR_SURF_P2) | 3241 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3243 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3244 PIPE_CONFIG(ADDR_SURF_P2) | 3245 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3246 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3247 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3248 PIPE_CONFIG(ADDR_SURF_P2) | 3249 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3250 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3251 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3252 PIPE_CONFIG(ADDR_SURF_P2) | 3253 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3254 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3255 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3256 PIPE_CONFIG(ADDR_SURF_P2) | 3257 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3259 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3260 PIPE_CONFIG(ADDR_SURF_P2) | 3261 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3262 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3263 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3264 PIPE_CONFIG(ADDR_SURF_P2) | 3265 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3266 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3267 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3268 PIPE_CONFIG(ADDR_SURF_P2) | 3269 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3270 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3271 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3272 PIPE_CONFIG(ADDR_SURF_P2) | 3273 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3274 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3275 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3276 PIPE_CONFIG(ADDR_SURF_P2) | 3277 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3278 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3279 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3280 PIPE_CONFIG(ADDR_SURF_P2) | 3281 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3282 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3283 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3284 PIPE_CONFIG(ADDR_SURF_P2) | 3285 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3286 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3287 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3288 PIPE_CONFIG(ADDR_SURF_P2) | 3289 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3290 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3291 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3292 PIPE_CONFIG(ADDR_SURF_P2) | 3293 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3294 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3295 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3296 PIPE_CONFIG(ADDR_SURF_P2) | 3297 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3298 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3299 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3300 PIPE_CONFIG(ADDR_SURF_P2) | 3301 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3302 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3303 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3304 PIPE_CONFIG(ADDR_SURF_P2) | 3305 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3306 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3307 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3308 PIPE_CONFIG(ADDR_SURF_P2) | 3309 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3310 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3311 3312 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3313 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3314 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3315 NUM_BANKS(ADDR_SURF_8_BANK)); 3316 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3317 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3318 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3319 NUM_BANKS(ADDR_SURF_8_BANK)); 3320 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3321 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3322 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3323 NUM_BANKS(ADDR_SURF_8_BANK)); 3324 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3325 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3326 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3327 NUM_BANKS(ADDR_SURF_8_BANK)); 3328 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3329 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3330 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3331 NUM_BANKS(ADDR_SURF_8_BANK)); 3332 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3333 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3334 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3335 NUM_BANKS(ADDR_SURF_8_BANK)); 3336 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3337 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3338 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3339 NUM_BANKS(ADDR_SURF_8_BANK)); 3340 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3341 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3342 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3343 NUM_BANKS(ADDR_SURF_16_BANK)); 3344 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3345 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3346 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3347 NUM_BANKS(ADDR_SURF_16_BANK)); 3348 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3349 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3350 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3351 NUM_BANKS(ADDR_SURF_16_BANK)); 3352 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3353 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3354 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3355 NUM_BANKS(ADDR_SURF_16_BANK)); 3356 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3357 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3358 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3359 NUM_BANKS(ADDR_SURF_16_BANK)); 3360 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3361 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3362 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3363 NUM_BANKS(ADDR_SURF_16_BANK)); 3364 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3365 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3366 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3367 NUM_BANKS(ADDR_SURF_8_BANK)); 3368 3369 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3370 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3371 reg_offset != 23) 3372 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3373 3374 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3375 if (reg_offset != 7) 3376 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3377 3378 break; 3379 } 3380 } 3381 3382 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3383 u32 se_num, u32 sh_num, u32 instance, 3384 int xcc_id) 3385 { 3386 u32 data; 3387 3388 if (instance == 0xffffffff) 3389 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3390 else 3391 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3392 3393 if (se_num == 0xffffffff) 3394 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3395 else 3396 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3397 3398 if (sh_num == 0xffffffff) 3399 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3400 else 3401 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3402 3403 WREG32(mmGRBM_GFX_INDEX, data); 3404 } 3405 3406 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3407 u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) 3408 { 3409 vi_srbm_select(adev, me, pipe, q, vm); 3410 } 3411 3412 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3413 { 3414 u32 data, mask; 3415 3416 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3417 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3418 3419 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3420 3421 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3422 adev->gfx.config.max_sh_per_se); 3423 3424 return (~data) & mask; 3425 } 3426 3427 static void 3428 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3429 { 3430 switch (adev->asic_type) { 3431 case CHIP_FIJI: 3432 case CHIP_VEGAM: 3433 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3434 RB_XSEL2(1) | PKR_MAP(2) | 3435 PKR_XSEL(1) | PKR_YSEL(1) | 3436 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3437 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3438 SE_PAIR_YSEL(2); 3439 break; 3440 case CHIP_TONGA: 3441 case CHIP_POLARIS10: 3442 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3443 SE_XSEL(1) | SE_YSEL(1); 3444 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3445 SE_PAIR_YSEL(2); 3446 break; 3447 case CHIP_TOPAZ: 3448 case CHIP_CARRIZO: 3449 *rconf |= RB_MAP_PKR0(2); 3450 *rconf1 |= 0x0; 3451 break; 3452 case CHIP_POLARIS11: 3453 case CHIP_POLARIS12: 3454 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3455 SE_XSEL(1) | SE_YSEL(1); 3456 *rconf1 |= 0x0; 3457 break; 3458 case CHIP_STONEY: 3459 *rconf |= 0x0; 3460 *rconf1 |= 0x0; 3461 break; 3462 default: 3463 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3464 break; 3465 } 3466 } 3467 3468 static void 3469 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3470 u32 raster_config, u32 raster_config_1, 3471 unsigned rb_mask, unsigned num_rb) 3472 { 3473 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3474 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3475 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3476 unsigned rb_per_se = num_rb / num_se; 3477 unsigned se_mask[4]; 3478 unsigned se; 3479 3480 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3481 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3482 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3483 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3484 3485 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3486 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3487 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3488 3489 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3490 (!se_mask[2] && !se_mask[3]))) { 3491 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3492 3493 if (!se_mask[0] && !se_mask[1]) { 3494 raster_config_1 |= 3495 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3496 } else { 3497 raster_config_1 |= 3498 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3499 } 3500 } 3501 3502 for (se = 0; se < num_se; se++) { 3503 unsigned raster_config_se = raster_config; 3504 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3505 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3506 int idx = (se / 2) * 2; 3507 3508 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3509 raster_config_se &= ~SE_MAP_MASK; 3510 3511 if (!se_mask[idx]) { 3512 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3513 } else { 3514 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3515 } 3516 } 3517 3518 pkr0_mask &= rb_mask; 3519 pkr1_mask &= rb_mask; 3520 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3521 raster_config_se &= ~PKR_MAP_MASK; 3522 3523 if (!pkr0_mask) { 3524 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3525 } else { 3526 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3527 } 3528 } 3529 3530 if (rb_per_se >= 2) { 3531 unsigned rb0_mask = 1 << (se * rb_per_se); 3532 unsigned rb1_mask = rb0_mask << 1; 3533 3534 rb0_mask &= rb_mask; 3535 rb1_mask &= rb_mask; 3536 if (!rb0_mask || !rb1_mask) { 3537 raster_config_se &= ~RB_MAP_PKR0_MASK; 3538 3539 if (!rb0_mask) { 3540 raster_config_se |= 3541 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3542 } else { 3543 raster_config_se |= 3544 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3545 } 3546 } 3547 3548 if (rb_per_se > 2) { 3549 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3550 rb1_mask = rb0_mask << 1; 3551 rb0_mask &= rb_mask; 3552 rb1_mask &= rb_mask; 3553 if (!rb0_mask || !rb1_mask) { 3554 raster_config_se &= ~RB_MAP_PKR1_MASK; 3555 3556 if (!rb0_mask) { 3557 raster_config_se |= 3558 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3559 } else { 3560 raster_config_se |= 3561 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3562 } 3563 } 3564 } 3565 } 3566 3567 /* GRBM_GFX_INDEX has a different offset on VI */ 3568 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0); 3569 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3570 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3571 } 3572 3573 /* GRBM_GFX_INDEX has a different offset on VI */ 3574 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3575 } 3576 3577 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3578 { 3579 int i, j; 3580 u32 data; 3581 u32 raster_config = 0, raster_config_1 = 0; 3582 u32 active_rbs = 0; 3583 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3584 adev->gfx.config.max_sh_per_se; 3585 unsigned num_rb_pipes; 3586 3587 mutex_lock(&adev->grbm_idx_mutex); 3588 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3589 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3590 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); 3591 data = gfx_v8_0_get_rb_active_bitmap(adev); 3592 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3593 rb_bitmap_width_per_sh); 3594 } 3595 } 3596 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3597 3598 adev->gfx.config.backend_enable_mask = active_rbs; 3599 adev->gfx.config.num_rbs = hweight32(active_rbs); 3600 3601 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3602 adev->gfx.config.max_shader_engines, 16); 3603 3604 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3605 3606 if (!adev->gfx.config.backend_enable_mask || 3607 adev->gfx.config.num_rbs >= num_rb_pipes) { 3608 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3609 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3610 } else { 3611 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3612 adev->gfx.config.backend_enable_mask, 3613 num_rb_pipes); 3614 } 3615 3616 /* cache the values for userspace */ 3617 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3618 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3619 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); 3620 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3621 RREG32(mmCC_RB_BACKEND_DISABLE); 3622 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3623 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3624 adev->gfx.config.rb_config[i][j].raster_config = 3625 RREG32(mmPA_SC_RASTER_CONFIG); 3626 adev->gfx.config.rb_config[i][j].raster_config_1 = 3627 RREG32(mmPA_SC_RASTER_CONFIG_1); 3628 } 3629 } 3630 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3631 mutex_unlock(&adev->grbm_idx_mutex); 3632 } 3633 3634 #define DEFAULT_SH_MEM_BASES (0x6000) 3635 /** 3636 * gfx_v8_0_init_compute_vmid - gart enable 3637 * 3638 * @adev: amdgpu_device pointer 3639 * 3640 * Initialize compute vmid sh_mem registers 3641 * 3642 */ 3643 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3644 { 3645 int i; 3646 uint32_t sh_mem_config; 3647 uint32_t sh_mem_bases; 3648 3649 /* 3650 * Configure apertures: 3651 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3652 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3653 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3654 */ 3655 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3656 3657 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3658 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3659 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3660 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3661 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3662 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3663 3664 mutex_lock(&adev->srbm_mutex); 3665 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 3666 vi_srbm_select(adev, 0, 0, 0, i); 3667 /* CP and shaders */ 3668 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3669 WREG32(mmSH_MEM_APE1_BASE, 1); 3670 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3671 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3672 } 3673 vi_srbm_select(adev, 0, 0, 0, 0); 3674 mutex_unlock(&adev->srbm_mutex); 3675 3676 /* Initialize all compute VMIDs to have no GDS, GWS, or OA 3677 access. These should be enabled by FW for target VMIDs. */ 3678 for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { 3679 WREG32(amdgpu_gds_reg_offset[i].mem_base, 0); 3680 WREG32(amdgpu_gds_reg_offset[i].mem_size, 0); 3681 WREG32(amdgpu_gds_reg_offset[i].gws, 0); 3682 WREG32(amdgpu_gds_reg_offset[i].oa, 0); 3683 } 3684 } 3685 3686 static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) 3687 { 3688 int vmid; 3689 3690 /* 3691 * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA 3692 * access. Compute VMIDs should be enabled by FW for target VMIDs, 3693 * the driver can enable them for graphics. VMID0 should maintain 3694 * access so that HWS firmware can save/restore entries. 3695 */ 3696 for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { 3697 WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); 3698 WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); 3699 WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); 3700 WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); 3701 } 3702 } 3703 3704 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3705 { 3706 switch (adev->asic_type) { 3707 default: 3708 adev->gfx.config.double_offchip_lds_buf = 1; 3709 break; 3710 case CHIP_CARRIZO: 3711 case CHIP_STONEY: 3712 adev->gfx.config.double_offchip_lds_buf = 0; 3713 break; 3714 } 3715 } 3716 3717 static void gfx_v8_0_constants_init(struct amdgpu_device *adev) 3718 { 3719 u32 tmp, sh_static_mem_cfg; 3720 int i; 3721 3722 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3723 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3724 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3725 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3726 3727 gfx_v8_0_tiling_mode_table_init(adev); 3728 gfx_v8_0_setup_rb(adev); 3729 gfx_v8_0_get_cu_info(adev); 3730 gfx_v8_0_config_init(adev); 3731 3732 /* XXX SH_MEM regs */ 3733 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3734 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3735 SWIZZLE_ENABLE, 1); 3736 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3737 ELEMENT_SIZE, 1); 3738 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3739 INDEX_STRIDE, 3); 3740 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3741 3742 mutex_lock(&adev->srbm_mutex); 3743 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3744 vi_srbm_select(adev, 0, 0, 0, i); 3745 /* CP and shaders */ 3746 if (i == 0) { 3747 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3748 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3749 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3750 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3751 WREG32(mmSH_MEM_CONFIG, tmp); 3752 WREG32(mmSH_MEM_BASES, 0); 3753 } else { 3754 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3755 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3756 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3757 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3758 WREG32(mmSH_MEM_CONFIG, tmp); 3759 tmp = adev->gmc.shared_aperture_start >> 48; 3760 WREG32(mmSH_MEM_BASES, tmp); 3761 } 3762 3763 WREG32(mmSH_MEM_APE1_BASE, 1); 3764 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3765 } 3766 vi_srbm_select(adev, 0, 0, 0, 0); 3767 mutex_unlock(&adev->srbm_mutex); 3768 3769 gfx_v8_0_init_compute_vmid(adev); 3770 gfx_v8_0_init_gds_vmid(adev); 3771 3772 mutex_lock(&adev->grbm_idx_mutex); 3773 /* 3774 * making sure that the following register writes will be broadcasted 3775 * to all the shaders 3776 */ 3777 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3778 3779 WREG32(mmPA_SC_FIFO_SIZE, 3780 (adev->gfx.config.sc_prim_fifo_size_frontend << 3781 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3782 (adev->gfx.config.sc_prim_fifo_size_backend << 3783 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3784 (adev->gfx.config.sc_hiz_tile_fifo_size << 3785 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3786 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3787 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3788 3789 tmp = RREG32(mmSPI_ARB_PRIORITY); 3790 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3791 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3792 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3793 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3794 WREG32(mmSPI_ARB_PRIORITY, tmp); 3795 3796 mutex_unlock(&adev->grbm_idx_mutex); 3797 3798 } 3799 3800 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3801 { 3802 u32 i, j, k; 3803 u32 mask; 3804 3805 mutex_lock(&adev->grbm_idx_mutex); 3806 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3807 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3808 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); 3809 for (k = 0; k < adev->usec_timeout; k++) { 3810 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3811 break; 3812 udelay(1); 3813 } 3814 if (k == adev->usec_timeout) { 3815 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3816 0xffffffff, 0xffffffff, 0); 3817 mutex_unlock(&adev->grbm_idx_mutex); 3818 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3819 i, j); 3820 return; 3821 } 3822 } 3823 } 3824 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 3825 mutex_unlock(&adev->grbm_idx_mutex); 3826 3827 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3828 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3829 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3830 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3831 for (k = 0; k < adev->usec_timeout; k++) { 3832 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3833 break; 3834 udelay(1); 3835 } 3836 } 3837 3838 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3839 bool enable) 3840 { 3841 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3842 3843 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3844 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3845 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3846 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3847 3848 WREG32(mmCP_INT_CNTL_RING0, tmp); 3849 } 3850 3851 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3852 { 3853 adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); 3854 /* csib */ 3855 WREG32(mmRLC_CSIB_ADDR_HI, 3856 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3857 WREG32(mmRLC_CSIB_ADDR_LO, 3858 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3859 WREG32(mmRLC_CSIB_LENGTH, 3860 adev->gfx.rlc.clear_state_size); 3861 } 3862 3863 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 3864 int ind_offset, 3865 int list_size, 3866 int *unique_indices, 3867 int *indices_count, 3868 int max_indices, 3869 int *ind_start_offsets, 3870 int *offset_count, 3871 int max_offset) 3872 { 3873 int indices; 3874 bool new_entry = true; 3875 3876 for (; ind_offset < list_size; ind_offset++) { 3877 3878 if (new_entry) { 3879 new_entry = false; 3880 ind_start_offsets[*offset_count] = ind_offset; 3881 *offset_count = *offset_count + 1; 3882 BUG_ON(*offset_count >= max_offset); 3883 } 3884 3885 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 3886 new_entry = true; 3887 continue; 3888 } 3889 3890 ind_offset += 2; 3891 3892 /* look for the matching indice */ 3893 for (indices = 0; 3894 indices < *indices_count; 3895 indices++) { 3896 if (unique_indices[indices] == 3897 register_list_format[ind_offset]) 3898 break; 3899 } 3900 3901 if (indices >= *indices_count) { 3902 unique_indices[*indices_count] = 3903 register_list_format[ind_offset]; 3904 indices = *indices_count; 3905 *indices_count = *indices_count + 1; 3906 BUG_ON(*indices_count >= max_indices); 3907 } 3908 3909 register_list_format[ind_offset] = indices; 3910 } 3911 } 3912 3913 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 3914 { 3915 int i, temp, data; 3916 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 3917 int indices_count = 0; 3918 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 3919 int offset_count = 0; 3920 3921 int list_size; 3922 unsigned int *register_list_format = 3923 kmemdup(adev->gfx.rlc.register_list_format, 3924 adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 3925 if (!register_list_format) 3926 return -ENOMEM; 3927 3928 gfx_v8_0_parse_ind_reg_list(register_list_format, 3929 RLC_FormatDirectRegListLength, 3930 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 3931 unique_indices, 3932 &indices_count, 3933 ARRAY_SIZE(unique_indices), 3934 indirect_start_offsets, 3935 &offset_count, 3936 ARRAY_SIZE(indirect_start_offsets)); 3937 3938 /* save and restore list */ 3939 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 3940 3941 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 3942 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 3943 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 3944 3945 /* indirect list */ 3946 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 3947 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 3948 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 3949 3950 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 3951 list_size = list_size >> 1; 3952 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 3953 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 3954 3955 /* starting offsets starts */ 3956 WREG32(mmRLC_GPM_SCRATCH_ADDR, 3957 adev->gfx.rlc.starting_offsets_start); 3958 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 3959 WREG32(mmRLC_GPM_SCRATCH_DATA, 3960 indirect_start_offsets[i]); 3961 3962 /* unique indices */ 3963 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 3964 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 3965 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 3966 if (unique_indices[i] != 0) { 3967 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 3968 WREG32(data + i, unique_indices[i] >> 20); 3969 } 3970 } 3971 kfree(register_list_format); 3972 3973 return 0; 3974 } 3975 3976 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 3977 { 3978 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 3979 } 3980 3981 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 3982 { 3983 uint32_t data; 3984 3985 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 3986 3987 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 3988 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 3989 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 3990 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 3991 WREG32(mmRLC_PG_DELAY, data); 3992 3993 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 3994 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 3995 3996 } 3997 3998 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 3999 bool enable) 4000 { 4001 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4002 } 4003 4004 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4005 bool enable) 4006 { 4007 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4008 } 4009 4010 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4011 { 4012 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4013 } 4014 4015 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4016 { 4017 if ((adev->asic_type == CHIP_CARRIZO) || 4018 (adev->asic_type == CHIP_STONEY)) { 4019 gfx_v8_0_init_csb(adev); 4020 gfx_v8_0_init_save_restore_list(adev); 4021 gfx_v8_0_enable_save_restore_machine(adev); 4022 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4023 gfx_v8_0_init_power_gating(adev); 4024 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4025 } else if ((adev->asic_type == CHIP_POLARIS11) || 4026 (adev->asic_type == CHIP_POLARIS12) || 4027 (adev->asic_type == CHIP_VEGAM)) { 4028 gfx_v8_0_init_csb(adev); 4029 gfx_v8_0_init_save_restore_list(adev); 4030 gfx_v8_0_enable_save_restore_machine(adev); 4031 gfx_v8_0_init_power_gating(adev); 4032 } 4033 4034 } 4035 4036 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4037 { 4038 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4039 4040 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4041 gfx_v8_0_wait_for_rlc_serdes(adev); 4042 } 4043 4044 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4045 { 4046 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4047 udelay(50); 4048 4049 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4050 udelay(50); 4051 } 4052 4053 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4054 { 4055 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4056 4057 /* carrizo do enable cp interrupt after cp inited */ 4058 if (!(adev->flags & AMD_IS_APU)) 4059 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4060 4061 udelay(50); 4062 } 4063 4064 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4065 { 4066 if (amdgpu_sriov_vf(adev)) { 4067 gfx_v8_0_init_csb(adev); 4068 return 0; 4069 } 4070 4071 adev->gfx.rlc.funcs->stop(adev); 4072 adev->gfx.rlc.funcs->reset(adev); 4073 gfx_v8_0_init_pg(adev); 4074 adev->gfx.rlc.funcs->start(adev); 4075 4076 return 0; 4077 } 4078 4079 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4080 { 4081 u32 tmp = RREG32(mmCP_ME_CNTL); 4082 4083 if (enable) { 4084 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4085 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4086 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4087 } else { 4088 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4089 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4090 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4091 } 4092 WREG32(mmCP_ME_CNTL, tmp); 4093 udelay(50); 4094 } 4095 4096 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4097 { 4098 u32 count = 0; 4099 const struct cs_section_def *sect = NULL; 4100 const struct cs_extent_def *ext = NULL; 4101 4102 /* begin clear state */ 4103 count += 2; 4104 /* context control state */ 4105 count += 3; 4106 4107 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4108 for (ext = sect->section; ext->extent != NULL; ++ext) { 4109 if (sect->id == SECT_CONTEXT) 4110 count += 2 + ext->reg_count; 4111 else 4112 return 0; 4113 } 4114 } 4115 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4116 count += 4; 4117 /* end clear state */ 4118 count += 2; 4119 /* clear state */ 4120 count += 2; 4121 4122 return count; 4123 } 4124 4125 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4126 { 4127 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4128 const struct cs_section_def *sect = NULL; 4129 const struct cs_extent_def *ext = NULL; 4130 int r, i; 4131 4132 /* init the CP */ 4133 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4134 WREG32(mmCP_ENDIAN_SWAP, 0); 4135 WREG32(mmCP_DEVICE_ID, 1); 4136 4137 gfx_v8_0_cp_gfx_enable(adev, true); 4138 4139 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4140 if (r) { 4141 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4142 return r; 4143 } 4144 4145 /* clear state buffer */ 4146 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4147 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4148 4149 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4150 amdgpu_ring_write(ring, 0x80000000); 4151 amdgpu_ring_write(ring, 0x80000000); 4152 4153 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4154 for (ext = sect->section; ext->extent != NULL; ++ext) { 4155 if (sect->id == SECT_CONTEXT) { 4156 amdgpu_ring_write(ring, 4157 PACKET3(PACKET3_SET_CONTEXT_REG, 4158 ext->reg_count)); 4159 amdgpu_ring_write(ring, 4160 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4161 for (i = 0; i < ext->reg_count; i++) 4162 amdgpu_ring_write(ring, ext->extent[i]); 4163 } 4164 } 4165 } 4166 4167 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4168 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4169 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4170 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4171 4172 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4173 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4174 4175 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4176 amdgpu_ring_write(ring, 0); 4177 4178 /* init the CE partitions */ 4179 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4180 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4181 amdgpu_ring_write(ring, 0x8000); 4182 amdgpu_ring_write(ring, 0x8000); 4183 4184 amdgpu_ring_commit(ring); 4185 4186 return 0; 4187 } 4188 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4189 { 4190 u32 tmp; 4191 /* no gfx doorbells on iceland */ 4192 if (adev->asic_type == CHIP_TOPAZ) 4193 return; 4194 4195 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4196 4197 if (ring->use_doorbell) { 4198 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4199 DOORBELL_OFFSET, ring->doorbell_index); 4200 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4201 DOORBELL_HIT, 0); 4202 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4203 DOORBELL_EN, 1); 4204 } else { 4205 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4206 } 4207 4208 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4209 4210 if (adev->flags & AMD_IS_APU) 4211 return; 4212 4213 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4214 DOORBELL_RANGE_LOWER, 4215 adev->doorbell_index.gfx_ring0); 4216 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4217 4218 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4219 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4220 } 4221 4222 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4223 { 4224 struct amdgpu_ring *ring; 4225 u32 tmp; 4226 u32 rb_bufsz; 4227 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4228 4229 /* Set the write pointer delay */ 4230 WREG32(mmCP_RB_WPTR_DELAY, 0); 4231 4232 /* set the RB to use vmid 0 */ 4233 WREG32(mmCP_RB_VMID, 0); 4234 4235 /* Set ring buffer size */ 4236 ring = &adev->gfx.gfx_ring[0]; 4237 rb_bufsz = order_base_2(ring->ring_size / 8); 4238 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4239 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4240 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4241 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4242 #ifdef __BIG_ENDIAN 4243 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4244 #endif 4245 WREG32(mmCP_RB0_CNTL, tmp); 4246 4247 /* Initialize the ring buffer's read and write pointers */ 4248 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4249 ring->wptr = 0; 4250 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4251 4252 /* set the wb address whether it's enabled or not */ 4253 rptr_addr = ring->rptr_gpu_addr; 4254 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4255 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4256 4257 wptr_gpu_addr = ring->wptr_gpu_addr; 4258 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4259 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4260 mdelay(1); 4261 WREG32(mmCP_RB0_CNTL, tmp); 4262 4263 rb_addr = ring->gpu_addr >> 8; 4264 WREG32(mmCP_RB0_BASE, rb_addr); 4265 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4266 4267 gfx_v8_0_set_cpg_door_bell(adev, ring); 4268 /* start the ring */ 4269 amdgpu_ring_clear_ring(ring); 4270 gfx_v8_0_cp_gfx_start(adev); 4271 4272 return 0; 4273 } 4274 4275 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4276 { 4277 if (enable) { 4278 WREG32(mmCP_MEC_CNTL, 0); 4279 } else { 4280 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4281 adev->gfx.kiq[0].ring.sched.ready = false; 4282 } 4283 udelay(50); 4284 } 4285 4286 /* KIQ functions */ 4287 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4288 { 4289 uint32_t tmp; 4290 struct amdgpu_device *adev = ring->adev; 4291 4292 /* tell RLC which is KIQ queue */ 4293 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4294 tmp &= 0xffffff00; 4295 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4296 WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80); 4297 } 4298 4299 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4300 { 4301 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; 4302 uint64_t queue_mask = 0; 4303 int r, i; 4304 4305 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4306 if (!test_bit(i, adev->gfx.mec_bitmap[0].queue_bitmap)) 4307 continue; 4308 4309 /* This situation may be hit in the future if a new HW 4310 * generation exposes more than 64 queues. If so, the 4311 * definition of queue_mask needs updating */ 4312 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4313 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4314 break; 4315 } 4316 4317 queue_mask |= (1ull << i); 4318 } 4319 4320 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); 4321 if (r) { 4322 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4323 return r; 4324 } 4325 /* set resources */ 4326 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4327 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4328 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4329 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4330 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4331 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4332 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4333 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4334 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4335 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4336 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4337 uint64_t wptr_addr = ring->wptr_gpu_addr; 4338 4339 /* map queues */ 4340 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4341 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4342 amdgpu_ring_write(kiq_ring, 4343 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4344 amdgpu_ring_write(kiq_ring, 4345 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4346 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4347 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4348 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4349 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4350 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4351 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4352 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4353 } 4354 4355 amdgpu_ring_commit(kiq_ring); 4356 4357 return 0; 4358 } 4359 4360 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4361 { 4362 int i, r = 0; 4363 4364 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4365 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4366 for (i = 0; i < adev->usec_timeout; i++) { 4367 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4368 break; 4369 udelay(1); 4370 } 4371 if (i == adev->usec_timeout) 4372 r = -ETIMEDOUT; 4373 } 4374 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4375 WREG32(mmCP_HQD_PQ_RPTR, 0); 4376 WREG32(mmCP_HQD_PQ_WPTR, 0); 4377 4378 return r; 4379 } 4380 4381 static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *mqd) 4382 { 4383 struct amdgpu_device *adev = ring->adev; 4384 4385 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 4386 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { 4387 mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; 4388 mqd->cp_hqd_queue_priority = 4389 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; 4390 } 4391 } 4392 } 4393 4394 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4395 { 4396 struct amdgpu_device *adev = ring->adev; 4397 struct vi_mqd *mqd = ring->mqd_ptr; 4398 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4399 uint32_t tmp; 4400 4401 mqd->header = 0xC0310800; 4402 mqd->compute_pipelinestat_enable = 0x00000001; 4403 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4404 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4405 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4406 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4407 mqd->compute_misc_reserved = 0x00000003; 4408 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4409 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4410 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4411 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4412 eop_base_addr = ring->eop_gpu_addr >> 8; 4413 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4414 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4415 4416 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4417 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4418 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4419 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4420 4421 mqd->cp_hqd_eop_control = tmp; 4422 4423 /* enable doorbell? */ 4424 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4425 CP_HQD_PQ_DOORBELL_CONTROL, 4426 DOORBELL_EN, 4427 ring->use_doorbell ? 1 : 0); 4428 4429 mqd->cp_hqd_pq_doorbell_control = tmp; 4430 4431 /* set the pointer to the MQD */ 4432 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4433 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4434 4435 /* set MQD vmid to 0 */ 4436 tmp = RREG32(mmCP_MQD_CONTROL); 4437 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4438 mqd->cp_mqd_control = tmp; 4439 4440 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4441 hqd_gpu_addr = ring->gpu_addr >> 8; 4442 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4443 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4444 4445 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4446 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4447 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4448 (order_base_2(ring->ring_size / 4) - 1)); 4449 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4450 (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); 4451 #ifdef __BIG_ENDIAN 4452 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4453 #endif 4454 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4455 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4456 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4457 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4458 mqd->cp_hqd_pq_control = tmp; 4459 4460 /* set the wb address whether it's enabled or not */ 4461 wb_gpu_addr = ring->rptr_gpu_addr; 4462 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4463 mqd->cp_hqd_pq_rptr_report_addr_hi = 4464 upper_32_bits(wb_gpu_addr) & 0xffff; 4465 4466 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4467 wb_gpu_addr = ring->wptr_gpu_addr; 4468 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4469 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4470 4471 tmp = 0; 4472 /* enable the doorbell if requested */ 4473 if (ring->use_doorbell) { 4474 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4475 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4476 DOORBELL_OFFSET, ring->doorbell_index); 4477 4478 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4479 DOORBELL_EN, 1); 4480 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4481 DOORBELL_SOURCE, 0); 4482 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4483 DOORBELL_HIT, 0); 4484 } 4485 4486 mqd->cp_hqd_pq_doorbell_control = tmp; 4487 4488 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4489 ring->wptr = 0; 4490 mqd->cp_hqd_pq_wptr = ring->wptr; 4491 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4492 4493 /* set the vmid for the queue */ 4494 mqd->cp_hqd_vmid = 0; 4495 4496 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4497 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4498 mqd->cp_hqd_persistent_state = tmp; 4499 4500 /* set MTYPE */ 4501 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4502 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4503 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4504 mqd->cp_hqd_ib_control = tmp; 4505 4506 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4507 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4508 mqd->cp_hqd_iq_timer = tmp; 4509 4510 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4511 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4512 mqd->cp_hqd_ctx_save_control = tmp; 4513 4514 /* defaults */ 4515 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4516 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4517 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4518 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4519 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4520 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4521 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4522 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4523 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4524 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4525 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4526 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4527 4528 /* set static priority for a queue/ring */ 4529 gfx_v8_0_mqd_set_priority(ring, mqd); 4530 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4531 4532 /* map_queues packet doesn't need activate the queue, 4533 * so only kiq need set this field. 4534 */ 4535 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) 4536 mqd->cp_hqd_active = 1; 4537 4538 return 0; 4539 } 4540 4541 static int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4542 struct vi_mqd *mqd) 4543 { 4544 uint32_t mqd_reg; 4545 uint32_t *mqd_data; 4546 4547 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4548 mqd_data = &mqd->cp_mqd_base_addr_lo; 4549 4550 /* disable wptr polling */ 4551 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4552 4553 /* program all HQD registers */ 4554 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4555 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4556 4557 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4558 * This is safe since EOP RPTR==WPTR for any inactive HQD 4559 * on ASICs that do not support context-save. 4560 * EOP writes/reads can start anywhere in the ring. 4561 */ 4562 if (adev->asic_type != CHIP_TONGA) { 4563 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4564 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4565 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4566 } 4567 4568 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4569 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4570 4571 /* activate the HQD */ 4572 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4573 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4574 4575 return 0; 4576 } 4577 4578 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4579 { 4580 struct amdgpu_device *adev = ring->adev; 4581 struct vi_mqd *mqd = ring->mqd_ptr; 4582 4583 gfx_v8_0_kiq_setting(ring); 4584 4585 if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ 4586 /* reset MQD to a clean status */ 4587 if (adev->gfx.kiq[0].mqd_backup) 4588 memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct vi_mqd_allocation)); 4589 4590 /* reset ring buffer */ 4591 ring->wptr = 0; 4592 amdgpu_ring_clear_ring(ring); 4593 mutex_lock(&adev->srbm_mutex); 4594 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4595 gfx_v8_0_mqd_commit(adev, mqd); 4596 vi_srbm_select(adev, 0, 0, 0, 0); 4597 mutex_unlock(&adev->srbm_mutex); 4598 } else { 4599 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4600 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4601 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4602 if (amdgpu_sriov_vf(adev) && adev->in_suspend) 4603 amdgpu_ring_clear_ring(ring); 4604 mutex_lock(&adev->srbm_mutex); 4605 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4606 gfx_v8_0_mqd_init(ring); 4607 gfx_v8_0_mqd_commit(adev, mqd); 4608 vi_srbm_select(adev, 0, 0, 0, 0); 4609 mutex_unlock(&adev->srbm_mutex); 4610 4611 if (adev->gfx.kiq[0].mqd_backup) 4612 memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct vi_mqd_allocation)); 4613 } 4614 4615 return 0; 4616 } 4617 4618 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4619 { 4620 struct amdgpu_device *adev = ring->adev; 4621 struct vi_mqd *mqd = ring->mqd_ptr; 4622 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4623 4624 if (!amdgpu_in_reset(adev) && !adev->in_suspend) { 4625 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4626 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4627 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4628 mutex_lock(&adev->srbm_mutex); 4629 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4630 gfx_v8_0_mqd_init(ring); 4631 vi_srbm_select(adev, 0, 0, 0, 0); 4632 mutex_unlock(&adev->srbm_mutex); 4633 4634 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4635 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4636 } else { 4637 /* restore MQD to a clean status */ 4638 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4639 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4640 /* reset ring buffer */ 4641 ring->wptr = 0; 4642 atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); 4643 amdgpu_ring_clear_ring(ring); 4644 } 4645 return 0; 4646 } 4647 4648 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4649 { 4650 if (adev->asic_type > CHIP_TONGA) { 4651 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2); 4652 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2); 4653 } 4654 /* enable doorbells */ 4655 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4656 } 4657 4658 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4659 { 4660 gfx_v8_0_kiq_init_queue(&adev->gfx.kiq[0].ring); 4661 return 0; 4662 } 4663 4664 static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) 4665 { 4666 int i, r; 4667 4668 gfx_v8_0_cp_compute_enable(adev, true); 4669 4670 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4671 r = gfx_v8_0_kcq_init_queue(&adev->gfx.compute_ring[i]); 4672 if (r) 4673 return r; 4674 } 4675 4676 gfx_v8_0_set_mec_doorbell_range(adev); 4677 4678 return gfx_v8_0_kiq_kcq_enable(adev); 4679 } 4680 4681 static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev) 4682 { 4683 int r, i; 4684 struct amdgpu_ring *ring; 4685 4686 /* collect all the ring_tests here, gfx, kiq, compute */ 4687 ring = &adev->gfx.gfx_ring[0]; 4688 r = amdgpu_ring_test_helper(ring); 4689 if (r) 4690 return r; 4691 4692 ring = &adev->gfx.kiq[0].ring; 4693 r = amdgpu_ring_test_helper(ring); 4694 if (r) 4695 return r; 4696 4697 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4698 ring = &adev->gfx.compute_ring[i]; 4699 amdgpu_ring_test_helper(ring); 4700 } 4701 4702 return 0; 4703 } 4704 4705 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 4706 { 4707 int r; 4708 4709 if (!(adev->flags & AMD_IS_APU)) 4710 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4711 4712 r = gfx_v8_0_kiq_resume(adev); 4713 if (r) 4714 return r; 4715 4716 r = gfx_v8_0_cp_gfx_resume(adev); 4717 if (r) 4718 return r; 4719 4720 r = gfx_v8_0_kcq_resume(adev); 4721 if (r) 4722 return r; 4723 4724 r = gfx_v8_0_cp_test_all_rings(adev); 4725 if (r) 4726 return r; 4727 4728 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4729 4730 return 0; 4731 } 4732 4733 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 4734 { 4735 gfx_v8_0_cp_gfx_enable(adev, enable); 4736 gfx_v8_0_cp_compute_enable(adev, enable); 4737 } 4738 4739 static int gfx_v8_0_hw_init(struct amdgpu_ip_block *ip_block) 4740 { 4741 int r; 4742 struct amdgpu_device *adev = ip_block->adev; 4743 4744 gfx_v8_0_init_golden_registers(adev); 4745 gfx_v8_0_constants_init(adev); 4746 4747 r = adev->gfx.rlc.funcs->resume(adev); 4748 if (r) 4749 return r; 4750 4751 r = gfx_v8_0_cp_resume(adev); 4752 4753 return r; 4754 } 4755 4756 static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) 4757 { 4758 int r, i; 4759 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring; 4760 4761 r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); 4762 if (r) 4763 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4764 4765 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4766 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4767 4768 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 4769 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 4770 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 4771 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 4772 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 4773 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 4774 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 4775 amdgpu_ring_write(kiq_ring, 0); 4776 amdgpu_ring_write(kiq_ring, 0); 4777 amdgpu_ring_write(kiq_ring, 0); 4778 } 4779 /* Submit unmap queue packet */ 4780 amdgpu_ring_commit(kiq_ring); 4781 /* 4782 * Ring test will do a basic scratch register change check. Just run 4783 * this to ensure that unmap queues that is submitted before got 4784 * processed successfully before returning. 4785 */ 4786 r = amdgpu_ring_test_helper(kiq_ring); 4787 if (r) 4788 DRM_ERROR("KCQ disable failed\n"); 4789 4790 return r; 4791 } 4792 4793 static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block) 4794 { 4795 struct amdgpu_device *adev = ip_block->adev; 4796 4797 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) 4798 || RREG32(mmGRBM_STATUS2) != 0x8) 4799 return false; 4800 else 4801 return true; 4802 } 4803 4804 static bool gfx_v8_0_rlc_is_idle(void *handle) 4805 { 4806 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4807 4808 if (RREG32(mmGRBM_STATUS2) != 0x8) 4809 return false; 4810 else 4811 return true; 4812 } 4813 4814 static int gfx_v8_0_wait_for_rlc_idle(void *handle) 4815 { 4816 unsigned int i; 4817 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 4818 4819 for (i = 0; i < adev->usec_timeout; i++) { 4820 if (gfx_v8_0_rlc_is_idle(handle)) 4821 return 0; 4822 4823 udelay(1); 4824 } 4825 return -ETIMEDOUT; 4826 } 4827 4828 static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block) 4829 { 4830 unsigned int i; 4831 struct amdgpu_device *adev = ip_block->adev; 4832 4833 for (i = 0; i < adev->usec_timeout; i++) { 4834 if (gfx_v8_0_is_idle(ip_block)) 4835 return 0; 4836 4837 udelay(1); 4838 } 4839 return -ETIMEDOUT; 4840 } 4841 4842 static int gfx_v8_0_hw_fini(struct amdgpu_ip_block *ip_block) 4843 { 4844 struct amdgpu_device *adev = ip_block->adev; 4845 4846 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 4847 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 4848 4849 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 4850 4851 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 4852 4853 /* disable KCQ to avoid CPC touch memory not valid anymore */ 4854 gfx_v8_0_kcq_disable(adev); 4855 4856 if (amdgpu_sriov_vf(adev)) { 4857 pr_debug("For SRIOV client, shouldn't do anything.\n"); 4858 return 0; 4859 } 4860 4861 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 4862 if (!gfx_v8_0_wait_for_idle(ip_block)) 4863 gfx_v8_0_cp_enable(adev, false); 4864 else 4865 pr_err("cp is busy, skip halt cp\n"); 4866 if (!gfx_v8_0_wait_for_rlc_idle(adev)) 4867 adev->gfx.rlc.funcs->stop(adev); 4868 else 4869 pr_err("rlc is busy, skip halt rlc\n"); 4870 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 4871 4872 return 0; 4873 } 4874 4875 static int gfx_v8_0_suspend(struct amdgpu_ip_block *ip_block) 4876 { 4877 return gfx_v8_0_hw_fini(ip_block); 4878 } 4879 4880 static int gfx_v8_0_resume(struct amdgpu_ip_block *ip_block) 4881 { 4882 return gfx_v8_0_hw_init(ip_block); 4883 } 4884 4885 static bool gfx_v8_0_check_soft_reset(struct amdgpu_ip_block *ip_block) 4886 { 4887 struct amdgpu_device *adev = ip_block->adev; 4888 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4889 u32 tmp; 4890 4891 /* GRBM_STATUS */ 4892 tmp = RREG32(mmGRBM_STATUS); 4893 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 4894 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 4895 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 4896 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 4897 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 4898 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 4899 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 4900 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4901 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 4902 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4903 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 4904 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4905 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4906 } 4907 4908 /* GRBM_STATUS2 */ 4909 tmp = RREG32(mmGRBM_STATUS2); 4910 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 4911 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 4912 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4913 4914 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 4915 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 4916 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 4917 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4918 SOFT_RESET_CPF, 1); 4919 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4920 SOFT_RESET_CPC, 1); 4921 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 4922 SOFT_RESET_CPG, 1); 4923 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 4924 SOFT_RESET_GRBM, 1); 4925 } 4926 4927 /* SRBM_STATUS */ 4928 tmp = RREG32(mmSRBM_STATUS); 4929 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 4930 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4931 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 4932 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 4933 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 4934 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 4935 4936 if (grbm_soft_reset || srbm_soft_reset) { 4937 adev->gfx.grbm_soft_reset = grbm_soft_reset; 4938 adev->gfx.srbm_soft_reset = srbm_soft_reset; 4939 return true; 4940 } else { 4941 adev->gfx.grbm_soft_reset = 0; 4942 adev->gfx.srbm_soft_reset = 0; 4943 return false; 4944 } 4945 } 4946 4947 static int gfx_v8_0_pre_soft_reset(struct amdgpu_ip_block *ip_block) 4948 { 4949 struct amdgpu_device *adev = ip_block->adev; 4950 u32 grbm_soft_reset = 0; 4951 4952 if ((!adev->gfx.grbm_soft_reset) && 4953 (!adev->gfx.srbm_soft_reset)) 4954 return 0; 4955 4956 grbm_soft_reset = adev->gfx.grbm_soft_reset; 4957 4958 /* stop the rlc */ 4959 adev->gfx.rlc.funcs->stop(adev); 4960 4961 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 4962 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 4963 /* Disable GFX parsing/prefetching */ 4964 gfx_v8_0_cp_gfx_enable(adev, false); 4965 4966 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 4967 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 4968 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 4969 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 4970 int i; 4971 4972 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4973 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4974 4975 mutex_lock(&adev->srbm_mutex); 4976 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4977 gfx_v8_0_deactivate_hqd(adev, 2); 4978 vi_srbm_select(adev, 0, 0, 0, 0); 4979 mutex_unlock(&adev->srbm_mutex); 4980 } 4981 /* Disable MEC parsing/prefetching */ 4982 gfx_v8_0_cp_compute_enable(adev, false); 4983 } 4984 4985 return 0; 4986 } 4987 4988 static int gfx_v8_0_soft_reset(struct amdgpu_ip_block *ip_block) 4989 { 4990 struct amdgpu_device *adev = ip_block->adev; 4991 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 4992 u32 tmp; 4993 4994 if ((!adev->gfx.grbm_soft_reset) && 4995 (!adev->gfx.srbm_soft_reset)) 4996 return 0; 4997 4998 grbm_soft_reset = adev->gfx.grbm_soft_reset; 4999 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5000 5001 if (grbm_soft_reset || srbm_soft_reset) { 5002 tmp = RREG32(mmGMCON_DEBUG); 5003 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5004 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5005 WREG32(mmGMCON_DEBUG, tmp); 5006 udelay(50); 5007 } 5008 5009 if (grbm_soft_reset) { 5010 tmp = RREG32(mmGRBM_SOFT_RESET); 5011 tmp |= grbm_soft_reset; 5012 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5013 WREG32(mmGRBM_SOFT_RESET, tmp); 5014 tmp = RREG32(mmGRBM_SOFT_RESET); 5015 5016 udelay(50); 5017 5018 tmp &= ~grbm_soft_reset; 5019 WREG32(mmGRBM_SOFT_RESET, tmp); 5020 tmp = RREG32(mmGRBM_SOFT_RESET); 5021 } 5022 5023 if (srbm_soft_reset) { 5024 tmp = RREG32(mmSRBM_SOFT_RESET); 5025 tmp |= srbm_soft_reset; 5026 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5027 WREG32(mmSRBM_SOFT_RESET, tmp); 5028 tmp = RREG32(mmSRBM_SOFT_RESET); 5029 5030 udelay(50); 5031 5032 tmp &= ~srbm_soft_reset; 5033 WREG32(mmSRBM_SOFT_RESET, tmp); 5034 tmp = RREG32(mmSRBM_SOFT_RESET); 5035 } 5036 5037 if (grbm_soft_reset || srbm_soft_reset) { 5038 tmp = RREG32(mmGMCON_DEBUG); 5039 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5040 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5041 WREG32(mmGMCON_DEBUG, tmp); 5042 } 5043 5044 /* Wait a little for things to settle down */ 5045 udelay(50); 5046 5047 return 0; 5048 } 5049 5050 static int gfx_v8_0_post_soft_reset(struct amdgpu_ip_block *ip_block) 5051 { 5052 struct amdgpu_device *adev = ip_block->adev; 5053 u32 grbm_soft_reset = 0; 5054 5055 if ((!adev->gfx.grbm_soft_reset) && 5056 (!adev->gfx.srbm_soft_reset)) 5057 return 0; 5058 5059 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5060 5061 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5062 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5063 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5064 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5065 int i; 5066 5067 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5068 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5069 5070 mutex_lock(&adev->srbm_mutex); 5071 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5072 gfx_v8_0_deactivate_hqd(adev, 2); 5073 vi_srbm_select(adev, 0, 0, 0, 0); 5074 mutex_unlock(&adev->srbm_mutex); 5075 } 5076 gfx_v8_0_kiq_resume(adev); 5077 gfx_v8_0_kcq_resume(adev); 5078 } 5079 5080 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5081 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5082 gfx_v8_0_cp_gfx_resume(adev); 5083 5084 gfx_v8_0_cp_test_all_rings(adev); 5085 5086 adev->gfx.rlc.funcs->start(adev); 5087 5088 return 0; 5089 } 5090 5091 /** 5092 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5093 * 5094 * @adev: amdgpu_device pointer 5095 * 5096 * Fetches a GPU clock counter snapshot. 5097 * Returns the 64 bit clock counter snapshot. 5098 */ 5099 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5100 { 5101 uint64_t clock; 5102 5103 mutex_lock(&adev->gfx.gpu_clock_mutex); 5104 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5105 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5106 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5107 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5108 return clock; 5109 } 5110 5111 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5112 uint32_t vmid, 5113 uint32_t gds_base, uint32_t gds_size, 5114 uint32_t gws_base, uint32_t gws_size, 5115 uint32_t oa_base, uint32_t oa_size) 5116 { 5117 /* GDS Base */ 5118 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5119 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5120 WRITE_DATA_DST_SEL(0))); 5121 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5122 amdgpu_ring_write(ring, 0); 5123 amdgpu_ring_write(ring, gds_base); 5124 5125 /* GDS Size */ 5126 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5127 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5128 WRITE_DATA_DST_SEL(0))); 5129 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5130 amdgpu_ring_write(ring, 0); 5131 amdgpu_ring_write(ring, gds_size); 5132 5133 /* GWS */ 5134 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5135 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5136 WRITE_DATA_DST_SEL(0))); 5137 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5138 amdgpu_ring_write(ring, 0); 5139 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5140 5141 /* OA */ 5142 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5143 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5144 WRITE_DATA_DST_SEL(0))); 5145 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5146 amdgpu_ring_write(ring, 0); 5147 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5148 } 5149 5150 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5151 { 5152 WREG32(mmSQ_IND_INDEX, 5153 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5154 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5155 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5156 (SQ_IND_INDEX__FORCE_READ_MASK)); 5157 return RREG32(mmSQ_IND_DATA); 5158 } 5159 5160 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5161 uint32_t wave, uint32_t thread, 5162 uint32_t regno, uint32_t num, uint32_t *out) 5163 { 5164 WREG32(mmSQ_IND_INDEX, 5165 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5166 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5167 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5168 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5169 (SQ_IND_INDEX__FORCE_READ_MASK) | 5170 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5171 while (num--) 5172 *(out++) = RREG32(mmSQ_IND_DATA); 5173 } 5174 5175 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5176 { 5177 /* type 0 wave data */ 5178 dst[(*no_fields)++] = 0; 5179 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5180 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5181 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5182 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5183 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5184 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5185 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5186 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5187 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5188 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5189 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5190 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5191 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5192 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5193 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5194 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5195 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5196 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5197 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); 5198 } 5199 5200 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, 5201 uint32_t wave, uint32_t start, 5202 uint32_t size, uint32_t *dst) 5203 { 5204 wave_read_regs( 5205 adev, simd, wave, 0, 5206 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5207 } 5208 5209 5210 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5211 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5212 .select_se_sh = &gfx_v8_0_select_se_sh, 5213 .read_wave_data = &gfx_v8_0_read_wave_data, 5214 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5215 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5216 }; 5217 5218 static int gfx_v8_0_early_init(struct amdgpu_ip_block *ip_block) 5219 { 5220 struct amdgpu_device *adev = ip_block->adev; 5221 5222 adev->gfx.xcc_mask = 1; 5223 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5224 adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), 5225 AMDGPU_MAX_COMPUTE_RINGS); 5226 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5227 gfx_v8_0_set_ring_funcs(adev); 5228 gfx_v8_0_set_irq_funcs(adev); 5229 gfx_v8_0_set_gds_init(adev); 5230 gfx_v8_0_set_rlc_funcs(adev); 5231 5232 return 0; 5233 } 5234 5235 static int gfx_v8_0_late_init(struct amdgpu_ip_block *ip_block) 5236 { 5237 struct amdgpu_device *adev = ip_block->adev; 5238 int r; 5239 5240 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5241 if (r) 5242 return r; 5243 5244 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5245 if (r) 5246 return r; 5247 5248 /* requires IBs so do in late init after IB pool is initialized */ 5249 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5250 if (r) 5251 return r; 5252 5253 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5254 if (r) { 5255 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5256 return r; 5257 } 5258 5259 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5260 if (r) { 5261 DRM_ERROR( 5262 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5263 r); 5264 return r; 5265 } 5266 5267 return 0; 5268 } 5269 5270 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5271 bool enable) 5272 { 5273 if ((adev->asic_type == CHIP_POLARIS11) || 5274 (adev->asic_type == CHIP_POLARIS12) || 5275 (adev->asic_type == CHIP_VEGAM)) 5276 /* Send msg to SMU via Powerplay */ 5277 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0); 5278 5279 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5280 } 5281 5282 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5283 bool enable) 5284 { 5285 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5286 } 5287 5288 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5289 bool enable) 5290 { 5291 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5292 } 5293 5294 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5295 bool enable) 5296 { 5297 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5298 } 5299 5300 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5301 bool enable) 5302 { 5303 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5304 5305 /* Read any GFX register to wake up GFX. */ 5306 if (!enable) 5307 RREG32(mmDB_RENDER_CONTROL); 5308 } 5309 5310 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5311 bool enable) 5312 { 5313 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5314 cz_enable_gfx_cg_power_gating(adev, true); 5315 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5316 cz_enable_gfx_pipeline_power_gating(adev, true); 5317 } else { 5318 cz_enable_gfx_cg_power_gating(adev, false); 5319 cz_enable_gfx_pipeline_power_gating(adev, false); 5320 } 5321 } 5322 5323 static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, 5324 enum amd_powergating_state state) 5325 { 5326 struct amdgpu_device *adev = ip_block->adev; 5327 bool enable = (state == AMD_PG_STATE_GATE); 5328 5329 if (amdgpu_sriov_vf(adev)) 5330 return 0; 5331 5332 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5333 AMD_PG_SUPPORT_RLC_SMU_HS | 5334 AMD_PG_SUPPORT_CP | 5335 AMD_PG_SUPPORT_GFX_DMG)) 5336 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5337 switch (adev->asic_type) { 5338 case CHIP_CARRIZO: 5339 case CHIP_STONEY: 5340 5341 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5342 cz_enable_sck_slow_down_on_power_up(adev, true); 5343 cz_enable_sck_slow_down_on_power_down(adev, true); 5344 } else { 5345 cz_enable_sck_slow_down_on_power_up(adev, false); 5346 cz_enable_sck_slow_down_on_power_down(adev, false); 5347 } 5348 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5349 cz_enable_cp_power_gating(adev, true); 5350 else 5351 cz_enable_cp_power_gating(adev, false); 5352 5353 cz_update_gfx_cg_power_gating(adev, enable); 5354 5355 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5356 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5357 else 5358 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5359 5360 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5361 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5362 else 5363 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5364 break; 5365 case CHIP_POLARIS11: 5366 case CHIP_POLARIS12: 5367 case CHIP_VEGAM: 5368 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5369 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5370 else 5371 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5372 5373 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5374 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5375 else 5376 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5377 5378 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5379 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5380 else 5381 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5382 break; 5383 default: 5384 break; 5385 } 5386 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5387 AMD_PG_SUPPORT_RLC_SMU_HS | 5388 AMD_PG_SUPPORT_CP | 5389 AMD_PG_SUPPORT_GFX_DMG)) 5390 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5391 return 0; 5392 } 5393 5394 static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) 5395 { 5396 struct amdgpu_device *adev = ip_block->adev; 5397 int data; 5398 5399 if (amdgpu_sriov_vf(adev)) 5400 *flags = 0; 5401 5402 /* AMD_CG_SUPPORT_GFX_MGCG */ 5403 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5404 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5405 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5406 5407 /* AMD_CG_SUPPORT_GFX_CGLG */ 5408 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5409 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5410 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5411 5412 /* AMD_CG_SUPPORT_GFX_CGLS */ 5413 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5414 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5415 5416 /* AMD_CG_SUPPORT_GFX_CGTS */ 5417 data = RREG32(mmCGTS_SM_CTRL_REG); 5418 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5419 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5420 5421 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5422 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5423 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5424 5425 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5426 data = RREG32(mmRLC_MEM_SLP_CNTL); 5427 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5428 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5429 5430 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5431 data = RREG32(mmCP_MEM_SLP_CNTL); 5432 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5433 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5434 } 5435 5436 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5437 uint32_t reg_addr, uint32_t cmd) 5438 { 5439 uint32_t data; 5440 5441 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 5442 5443 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5444 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5445 5446 data = RREG32(mmRLC_SERDES_WR_CTRL); 5447 if (adev->asic_type == CHIP_STONEY) 5448 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5449 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5450 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5451 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5452 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5453 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5454 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5455 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5456 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5457 else 5458 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5459 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5460 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5461 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5462 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5463 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5464 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5465 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5466 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5467 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5468 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5469 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5470 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5471 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5472 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5473 5474 WREG32(mmRLC_SERDES_WR_CTRL, data); 5475 } 5476 5477 #define MSG_ENTER_RLC_SAFE_MODE 1 5478 #define MSG_EXIT_RLC_SAFE_MODE 0 5479 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5480 #define RLC_GPR_REG2__REQ__SHIFT 0 5481 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5482 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5483 5484 static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) 5485 { 5486 uint32_t rlc_setting; 5487 5488 rlc_setting = RREG32(mmRLC_CNTL); 5489 if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5490 return false; 5491 5492 return true; 5493 } 5494 5495 static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) 5496 { 5497 uint32_t data; 5498 unsigned i; 5499 data = RREG32(mmRLC_CNTL); 5500 data |= RLC_SAFE_MODE__CMD_MASK; 5501 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5502 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5503 WREG32(mmRLC_SAFE_MODE, data); 5504 5505 /* wait for RLC_SAFE_MODE */ 5506 for (i = 0; i < adev->usec_timeout; i++) { 5507 if ((RREG32(mmRLC_GPM_STAT) & 5508 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5509 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5510 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5511 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5512 break; 5513 udelay(1); 5514 } 5515 for (i = 0; i < adev->usec_timeout; i++) { 5516 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5517 break; 5518 udelay(1); 5519 } 5520 } 5521 5522 static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) 5523 { 5524 uint32_t data; 5525 unsigned i; 5526 5527 data = RREG32(mmRLC_CNTL); 5528 data |= RLC_SAFE_MODE__CMD_MASK; 5529 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5530 WREG32(mmRLC_SAFE_MODE, data); 5531 5532 for (i = 0; i < adev->usec_timeout; i++) { 5533 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5534 break; 5535 udelay(1); 5536 } 5537 } 5538 5539 static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) 5540 { 5541 u32 data; 5542 5543 amdgpu_gfx_off_ctrl(adev, false); 5544 5545 if (amdgpu_sriov_is_pp_one_vf(adev)) 5546 data = RREG32_NO_KIQ(mmRLC_SPM_VMID); 5547 else 5548 data = RREG32(mmRLC_SPM_VMID); 5549 5550 data &= ~RLC_SPM_VMID__RLC_SPM_VMID_MASK; 5551 data |= (vmid & RLC_SPM_VMID__RLC_SPM_VMID_MASK) << RLC_SPM_VMID__RLC_SPM_VMID__SHIFT; 5552 5553 if (amdgpu_sriov_is_pp_one_vf(adev)) 5554 WREG32_NO_KIQ(mmRLC_SPM_VMID, data); 5555 else 5556 WREG32(mmRLC_SPM_VMID, data); 5557 5558 amdgpu_gfx_off_ctrl(adev, true); 5559 } 5560 5561 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5562 .is_rlc_enabled = gfx_v8_0_is_rlc_enabled, 5563 .set_safe_mode = gfx_v8_0_set_safe_mode, 5564 .unset_safe_mode = gfx_v8_0_unset_safe_mode, 5565 .init = gfx_v8_0_rlc_init, 5566 .get_csb_size = gfx_v8_0_get_csb_size, 5567 .get_csb_buffer = gfx_v8_0_get_csb_buffer, 5568 .get_cp_table_num = gfx_v8_0_cp_jump_table_num, 5569 .resume = gfx_v8_0_rlc_resume, 5570 .stop = gfx_v8_0_rlc_stop, 5571 .reset = gfx_v8_0_rlc_reset, 5572 .start = gfx_v8_0_rlc_start, 5573 .update_spm_vmid = gfx_v8_0_update_spm_vmid 5574 }; 5575 5576 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5577 bool enable) 5578 { 5579 uint32_t temp, data; 5580 5581 /* It is disabled by HW by default */ 5582 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5583 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5584 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5585 /* 1 - RLC memory Light sleep */ 5586 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5587 5588 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5589 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5590 } 5591 5592 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5593 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5594 if (adev->flags & AMD_IS_APU) 5595 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5596 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5597 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5598 else 5599 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5600 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5601 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5602 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5603 5604 if (temp != data) 5605 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5606 5607 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5608 gfx_v8_0_wait_for_rlc_serdes(adev); 5609 5610 /* 5 - clear mgcg override */ 5611 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5612 5613 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5614 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5615 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5616 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5617 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5618 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5619 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5620 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5621 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5622 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5623 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5624 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5625 if (temp != data) 5626 WREG32(mmCGTS_SM_CTRL_REG, data); 5627 } 5628 udelay(50); 5629 5630 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5631 gfx_v8_0_wait_for_rlc_serdes(adev); 5632 } else { 5633 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5634 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5635 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5636 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5637 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5638 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5639 if (temp != data) 5640 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5641 5642 /* 2 - disable MGLS in RLC */ 5643 data = RREG32(mmRLC_MEM_SLP_CNTL); 5644 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5645 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5646 WREG32(mmRLC_MEM_SLP_CNTL, data); 5647 } 5648 5649 /* 3 - disable MGLS in CP */ 5650 data = RREG32(mmCP_MEM_SLP_CNTL); 5651 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5652 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5653 WREG32(mmCP_MEM_SLP_CNTL, data); 5654 } 5655 5656 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5657 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5658 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5659 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5660 if (temp != data) 5661 WREG32(mmCGTS_SM_CTRL_REG, data); 5662 5663 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5664 gfx_v8_0_wait_for_rlc_serdes(adev); 5665 5666 /* 6 - set mgcg override */ 5667 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5668 5669 udelay(50); 5670 5671 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5672 gfx_v8_0_wait_for_rlc_serdes(adev); 5673 } 5674 } 5675 5676 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5677 bool enable) 5678 { 5679 uint32_t temp, temp1, data, data1; 5680 5681 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5682 5683 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5684 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5685 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5686 if (temp1 != data1) 5687 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5688 5689 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5690 gfx_v8_0_wait_for_rlc_serdes(adev); 5691 5692 /* 2 - clear cgcg override */ 5693 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5694 5695 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5696 gfx_v8_0_wait_for_rlc_serdes(adev); 5697 5698 /* 3 - write cmd to set CGLS */ 5699 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 5700 5701 /* 4 - enable cgcg */ 5702 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 5703 5704 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5705 /* enable cgls*/ 5706 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5707 5708 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5709 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 5710 5711 if (temp1 != data1) 5712 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5713 } else { 5714 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 5715 } 5716 5717 if (temp != data) 5718 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5719 5720 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 5721 * Cmp_busy/GFX_Idle interrupts 5722 */ 5723 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5724 } else { 5725 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 5726 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5727 5728 /* TEST CGCG */ 5729 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5730 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 5731 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 5732 if (temp1 != data1) 5733 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 5734 5735 /* read gfx register to wake up cgcg */ 5736 RREG32(mmCB_CGTT_SCLK_CTRL); 5737 RREG32(mmCB_CGTT_SCLK_CTRL); 5738 RREG32(mmCB_CGTT_SCLK_CTRL); 5739 RREG32(mmCB_CGTT_SCLK_CTRL); 5740 5741 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5742 gfx_v8_0_wait_for_rlc_serdes(adev); 5743 5744 /* write cmd to Set CGCG Override */ 5745 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5746 5747 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5748 gfx_v8_0_wait_for_rlc_serdes(adev); 5749 5750 /* write cmd to Clear CGLS */ 5751 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 5752 5753 /* disable cgcg, cgls should be disabled too. */ 5754 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 5755 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 5756 if (temp != data) 5757 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 5758 /* enable interrupts again for PG */ 5759 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5760 } 5761 5762 gfx_v8_0_wait_for_rlc_serdes(adev); 5763 } 5764 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 5765 bool enable) 5766 { 5767 amdgpu_gfx_rlc_enter_safe_mode(adev, 0); 5768 5769 if (enable) { 5770 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 5771 * === MGCG + MGLS + TS(CG/LS) === 5772 */ 5773 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5774 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5775 } else { 5776 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 5777 * === CGCG + CGLS === 5778 */ 5779 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 5780 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 5781 } 5782 5783 amdgpu_gfx_rlc_exit_safe_mode(adev, 0); 5784 return 0; 5785 } 5786 5787 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 5788 enum amd_clockgating_state state) 5789 { 5790 uint32_t msg_id, pp_state = 0; 5791 uint32_t pp_support_state = 0; 5792 5793 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5794 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5795 pp_support_state = PP_STATE_SUPPORT_LS; 5796 pp_state = PP_STATE_LS; 5797 } 5798 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5799 pp_support_state |= PP_STATE_SUPPORT_CG; 5800 pp_state |= PP_STATE_CG; 5801 } 5802 if (state == AMD_CG_STATE_UNGATE) 5803 pp_state = 0; 5804 5805 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5806 PP_BLOCK_GFX_CG, 5807 pp_support_state, 5808 pp_state); 5809 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5810 } 5811 5812 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5813 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5814 pp_support_state = PP_STATE_SUPPORT_LS; 5815 pp_state = PP_STATE_LS; 5816 } 5817 5818 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5819 pp_support_state |= PP_STATE_SUPPORT_CG; 5820 pp_state |= PP_STATE_CG; 5821 } 5822 5823 if (state == AMD_CG_STATE_UNGATE) 5824 pp_state = 0; 5825 5826 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5827 PP_BLOCK_GFX_MG, 5828 pp_support_state, 5829 pp_state); 5830 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5831 } 5832 5833 return 0; 5834 } 5835 5836 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 5837 enum amd_clockgating_state state) 5838 { 5839 5840 uint32_t msg_id, pp_state = 0; 5841 uint32_t pp_support_state = 0; 5842 5843 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 5844 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 5845 pp_support_state = PP_STATE_SUPPORT_LS; 5846 pp_state = PP_STATE_LS; 5847 } 5848 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 5849 pp_support_state |= PP_STATE_SUPPORT_CG; 5850 pp_state |= PP_STATE_CG; 5851 } 5852 if (state == AMD_CG_STATE_UNGATE) 5853 pp_state = 0; 5854 5855 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5856 PP_BLOCK_GFX_CG, 5857 pp_support_state, 5858 pp_state); 5859 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5860 } 5861 5862 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 5863 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 5864 pp_support_state = PP_STATE_SUPPORT_LS; 5865 pp_state = PP_STATE_LS; 5866 } 5867 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 5868 pp_support_state |= PP_STATE_SUPPORT_CG; 5869 pp_state |= PP_STATE_CG; 5870 } 5871 if (state == AMD_CG_STATE_UNGATE) 5872 pp_state = 0; 5873 5874 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5875 PP_BLOCK_GFX_3D, 5876 pp_support_state, 5877 pp_state); 5878 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5879 } 5880 5881 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 5882 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5883 pp_support_state = PP_STATE_SUPPORT_LS; 5884 pp_state = PP_STATE_LS; 5885 } 5886 5887 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 5888 pp_support_state |= PP_STATE_SUPPORT_CG; 5889 pp_state |= PP_STATE_CG; 5890 } 5891 5892 if (state == AMD_CG_STATE_UNGATE) 5893 pp_state = 0; 5894 5895 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5896 PP_BLOCK_GFX_MG, 5897 pp_support_state, 5898 pp_state); 5899 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5900 } 5901 5902 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 5903 pp_support_state = PP_STATE_SUPPORT_LS; 5904 5905 if (state == AMD_CG_STATE_UNGATE) 5906 pp_state = 0; 5907 else 5908 pp_state = PP_STATE_LS; 5909 5910 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5911 PP_BLOCK_GFX_RLC, 5912 pp_support_state, 5913 pp_state); 5914 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5915 } 5916 5917 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 5918 pp_support_state = PP_STATE_SUPPORT_LS; 5919 5920 if (state == AMD_CG_STATE_UNGATE) 5921 pp_state = 0; 5922 else 5923 pp_state = PP_STATE_LS; 5924 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 5925 PP_BLOCK_GFX_CP, 5926 pp_support_state, 5927 pp_state); 5928 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 5929 } 5930 5931 return 0; 5932 } 5933 5934 static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, 5935 enum amd_clockgating_state state) 5936 { 5937 struct amdgpu_device *adev = ip_block->adev; 5938 5939 if (amdgpu_sriov_vf(adev)) 5940 return 0; 5941 5942 switch (adev->asic_type) { 5943 case CHIP_FIJI: 5944 case CHIP_CARRIZO: 5945 case CHIP_STONEY: 5946 gfx_v8_0_update_gfx_clock_gating(adev, 5947 state == AMD_CG_STATE_GATE); 5948 break; 5949 case CHIP_TONGA: 5950 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 5951 break; 5952 case CHIP_POLARIS10: 5953 case CHIP_POLARIS11: 5954 case CHIP_POLARIS12: 5955 case CHIP_VEGAM: 5956 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 5957 break; 5958 default: 5959 break; 5960 } 5961 return 0; 5962 } 5963 5964 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 5965 { 5966 return *ring->rptr_cpu_addr; 5967 } 5968 5969 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 5970 { 5971 struct amdgpu_device *adev = ring->adev; 5972 5973 if (ring->use_doorbell) 5974 /* XXX check if swapping is necessary on BE */ 5975 return *ring->wptr_cpu_addr; 5976 else 5977 return RREG32(mmCP_RB0_WPTR); 5978 } 5979 5980 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 5981 { 5982 struct amdgpu_device *adev = ring->adev; 5983 5984 if (ring->use_doorbell) { 5985 /* XXX check if swapping is necessary on BE */ 5986 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 5987 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 5988 } else { 5989 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 5990 (void)RREG32(mmCP_RB0_WPTR); 5991 } 5992 } 5993 5994 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 5995 { 5996 u32 ref_and_mask, reg_mem_engine; 5997 5998 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 5999 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6000 switch (ring->me) { 6001 case 1: 6002 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6003 break; 6004 case 2: 6005 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6006 break; 6007 default: 6008 return; 6009 } 6010 reg_mem_engine = 0; 6011 } else { 6012 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6013 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6014 } 6015 6016 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6017 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6018 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6019 reg_mem_engine)); 6020 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6021 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6022 amdgpu_ring_write(ring, ref_and_mask); 6023 amdgpu_ring_write(ring, ref_and_mask); 6024 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6025 } 6026 6027 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6028 { 6029 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6030 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6031 EVENT_INDEX(4)); 6032 6033 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6034 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6035 EVENT_INDEX(0)); 6036 } 6037 6038 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6039 struct amdgpu_job *job, 6040 struct amdgpu_ib *ib, 6041 uint32_t flags) 6042 { 6043 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 6044 u32 header, control = 0; 6045 6046 if (ib->flags & AMDGPU_IB_FLAG_CE) 6047 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6048 else 6049 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6050 6051 control |= ib->length_dw | (vmid << 24); 6052 6053 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6054 control |= INDIRECT_BUFFER_PRE_ENB(1); 6055 6056 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) 6057 gfx_v8_0_ring_emit_de_meta(ring); 6058 } 6059 6060 amdgpu_ring_write(ring, header); 6061 amdgpu_ring_write(ring, 6062 #ifdef __BIG_ENDIAN 6063 (2 << 0) | 6064 #endif 6065 (ib->gpu_addr & 0xFFFFFFFC)); 6066 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6067 amdgpu_ring_write(ring, control); 6068 } 6069 6070 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6071 struct amdgpu_job *job, 6072 struct amdgpu_ib *ib, 6073 uint32_t flags) 6074 { 6075 unsigned vmid = AMDGPU_JOB_GET_VMID(job); 6076 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6077 6078 /* Currently, there is a high possibility to get wave ID mismatch 6079 * between ME and GDS, leading to a hw deadlock, because ME generates 6080 * different wave IDs than the GDS expects. This situation happens 6081 * randomly when at least 5 compute pipes use GDS ordered append. 6082 * The wave IDs generated by ME are also wrong after suspend/resume. 6083 * Those are probably bugs somewhere else in the kernel driver. 6084 * 6085 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and 6086 * GDS to 0 for this ring (me/pipe). 6087 */ 6088 if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) { 6089 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); 6090 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START); 6091 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id); 6092 } 6093 6094 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6095 amdgpu_ring_write(ring, 6096 #ifdef __BIG_ENDIAN 6097 (2 << 0) | 6098 #endif 6099 (ib->gpu_addr & 0xFFFFFFFC)); 6100 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6101 amdgpu_ring_write(ring, control); 6102 } 6103 6104 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6105 u64 seq, unsigned flags) 6106 { 6107 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6108 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6109 bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; 6110 6111 /* Workaround for cache flush problems. First send a dummy EOP 6112 * event down the pipe with seq one below. 6113 */ 6114 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6115 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6116 EOP_TC_ACTION_EN | 6117 EOP_TC_WB_ACTION_EN | 6118 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6119 EVENT_INDEX(5))); 6120 amdgpu_ring_write(ring, addr & 0xfffffffc); 6121 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6122 DATA_SEL(1) | INT_SEL(0)); 6123 amdgpu_ring_write(ring, lower_32_bits(seq - 1)); 6124 amdgpu_ring_write(ring, upper_32_bits(seq - 1)); 6125 6126 /* Then send the real EOP event down the pipe: 6127 * EVENT_WRITE_EOP - flush caches, send int */ 6128 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6129 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6130 EOP_TC_ACTION_EN | 6131 EOP_TC_WB_ACTION_EN | 6132 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6133 EVENT_INDEX(5) | 6134 (exec ? EOP_EXEC : 0))); 6135 amdgpu_ring_write(ring, addr & 0xfffffffc); 6136 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6137 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6138 amdgpu_ring_write(ring, lower_32_bits(seq)); 6139 amdgpu_ring_write(ring, upper_32_bits(seq)); 6140 6141 } 6142 6143 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6144 { 6145 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6146 uint32_t seq = ring->fence_drv.sync_seq; 6147 uint64_t addr = ring->fence_drv.gpu_addr; 6148 6149 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6150 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6151 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6152 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6153 amdgpu_ring_write(ring, addr & 0xfffffffc); 6154 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6155 amdgpu_ring_write(ring, seq); 6156 amdgpu_ring_write(ring, 0xffffffff); 6157 amdgpu_ring_write(ring, 4); /* poll interval */ 6158 } 6159 6160 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6161 unsigned vmid, uint64_t pd_addr) 6162 { 6163 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6164 6165 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6166 6167 /* wait for the invalidate to complete */ 6168 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6169 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6170 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6171 WAIT_REG_MEM_ENGINE(0))); /* me */ 6172 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6173 amdgpu_ring_write(ring, 0); 6174 amdgpu_ring_write(ring, 0); /* ref */ 6175 amdgpu_ring_write(ring, 0); /* mask */ 6176 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6177 6178 /* compute doesn't have PFP */ 6179 if (usepfp) { 6180 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6181 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6182 amdgpu_ring_write(ring, 0x0); 6183 } 6184 } 6185 6186 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6187 { 6188 return *ring->wptr_cpu_addr; 6189 } 6190 6191 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6192 { 6193 struct amdgpu_device *adev = ring->adev; 6194 6195 /* XXX check if swapping is necessary on BE */ 6196 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); 6197 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6198 } 6199 6200 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6201 u64 addr, u64 seq, 6202 unsigned flags) 6203 { 6204 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6205 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6206 6207 /* RELEASE_MEM - flush caches, send int */ 6208 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6209 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6210 EOP_TC_ACTION_EN | 6211 EOP_TC_WB_ACTION_EN | 6212 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6213 EVENT_INDEX(5))); 6214 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6215 amdgpu_ring_write(ring, addr & 0xfffffffc); 6216 amdgpu_ring_write(ring, upper_32_bits(addr)); 6217 amdgpu_ring_write(ring, lower_32_bits(seq)); 6218 amdgpu_ring_write(ring, upper_32_bits(seq)); 6219 } 6220 6221 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6222 u64 seq, unsigned int flags) 6223 { 6224 /* we only allocate 32bit for each seq wb address */ 6225 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6226 6227 /* write fence seq to the "addr" */ 6228 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6229 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6230 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6231 amdgpu_ring_write(ring, lower_32_bits(addr)); 6232 amdgpu_ring_write(ring, upper_32_bits(addr)); 6233 amdgpu_ring_write(ring, lower_32_bits(seq)); 6234 6235 if (flags & AMDGPU_FENCE_FLAG_INT) { 6236 /* set register to trigger INT */ 6237 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6238 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6239 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6240 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6241 amdgpu_ring_write(ring, 0); 6242 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6243 } 6244 } 6245 6246 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6247 { 6248 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6249 amdgpu_ring_write(ring, 0); 6250 } 6251 6252 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6253 { 6254 uint32_t dw2 = 0; 6255 6256 if (amdgpu_sriov_vf(ring->adev)) 6257 gfx_v8_0_ring_emit_ce_meta(ring); 6258 6259 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6260 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6261 gfx_v8_0_ring_emit_vgt_flush(ring); 6262 /* set load_global_config & load_global_uconfig */ 6263 dw2 |= 0x8001; 6264 /* set load_cs_sh_regs */ 6265 dw2 |= 0x01000000; 6266 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6267 dw2 |= 0x10002; 6268 6269 /* set load_ce_ram if preamble presented */ 6270 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6271 dw2 |= 0x10000000; 6272 } else { 6273 /* still load_ce_ram if this is the first time preamble presented 6274 * although there is no context switch happens. 6275 */ 6276 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6277 dw2 |= 0x10000000; 6278 } 6279 6280 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6281 amdgpu_ring_write(ring, dw2); 6282 amdgpu_ring_write(ring, 0); 6283 } 6284 6285 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, 6286 uint64_t addr) 6287 { 6288 unsigned ret; 6289 6290 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6291 amdgpu_ring_write(ring, lower_32_bits(addr)); 6292 amdgpu_ring_write(ring, upper_32_bits(addr)); 6293 /* discard following DWs if *cond_exec_gpu_addr==0 */ 6294 amdgpu_ring_write(ring, 0); 6295 ret = ring->wptr & ring->buf_mask; 6296 /* patch dummy value later */ 6297 amdgpu_ring_write(ring, 0); 6298 return ret; 6299 } 6300 6301 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, 6302 uint32_t reg_val_offs) 6303 { 6304 struct amdgpu_device *adev = ring->adev; 6305 6306 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6307 amdgpu_ring_write(ring, 0 | /* src: register*/ 6308 (5 << 8) | /* dst: memory */ 6309 (1 << 20)); /* write confirm */ 6310 amdgpu_ring_write(ring, reg); 6311 amdgpu_ring_write(ring, 0); 6312 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6313 reg_val_offs * 4)); 6314 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6315 reg_val_offs * 4)); 6316 } 6317 6318 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6319 uint32_t val) 6320 { 6321 uint32_t cmd; 6322 6323 switch (ring->funcs->type) { 6324 case AMDGPU_RING_TYPE_GFX: 6325 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6326 break; 6327 case AMDGPU_RING_TYPE_KIQ: 6328 cmd = 1 << 16; /* no inc addr */ 6329 break; 6330 default: 6331 cmd = WR_CONFIRM; 6332 break; 6333 } 6334 6335 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6336 amdgpu_ring_write(ring, cmd); 6337 amdgpu_ring_write(ring, reg); 6338 amdgpu_ring_write(ring, 0); 6339 amdgpu_ring_write(ring, val); 6340 } 6341 6342 static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) 6343 { 6344 struct amdgpu_device *adev = ring->adev; 6345 uint32_t value = 0; 6346 6347 value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); 6348 value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); 6349 value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); 6350 value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); 6351 WREG32(mmSQ_CMD, value); 6352 } 6353 6354 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6355 enum amdgpu_interrupt_state state) 6356 { 6357 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6358 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6359 } 6360 6361 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6362 int me, int pipe, 6363 enum amdgpu_interrupt_state state) 6364 { 6365 u32 mec_int_cntl, mec_int_cntl_reg; 6366 6367 /* 6368 * amdgpu controls only the first MEC. That's why this function only 6369 * handles the setting of interrupts for this specific MEC. All other 6370 * pipes' interrupts are set by amdkfd. 6371 */ 6372 6373 if (me == 1) { 6374 switch (pipe) { 6375 case 0: 6376 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6377 break; 6378 case 1: 6379 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6380 break; 6381 case 2: 6382 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6383 break; 6384 case 3: 6385 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6386 break; 6387 default: 6388 DRM_DEBUG("invalid pipe %d\n", pipe); 6389 return; 6390 } 6391 } else { 6392 DRM_DEBUG("invalid me %d\n", me); 6393 return; 6394 } 6395 6396 switch (state) { 6397 case AMDGPU_IRQ_STATE_DISABLE: 6398 mec_int_cntl = RREG32(mec_int_cntl_reg); 6399 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6400 WREG32(mec_int_cntl_reg, mec_int_cntl); 6401 break; 6402 case AMDGPU_IRQ_STATE_ENABLE: 6403 mec_int_cntl = RREG32(mec_int_cntl_reg); 6404 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6405 WREG32(mec_int_cntl_reg, mec_int_cntl); 6406 break; 6407 default: 6408 break; 6409 } 6410 } 6411 6412 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6413 struct amdgpu_irq_src *source, 6414 unsigned type, 6415 enum amdgpu_interrupt_state state) 6416 { 6417 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6418 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6419 6420 return 0; 6421 } 6422 6423 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6424 struct amdgpu_irq_src *source, 6425 unsigned type, 6426 enum amdgpu_interrupt_state state) 6427 { 6428 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6429 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6430 6431 return 0; 6432 } 6433 6434 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6435 struct amdgpu_irq_src *src, 6436 unsigned type, 6437 enum amdgpu_interrupt_state state) 6438 { 6439 switch (type) { 6440 case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP: 6441 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6442 break; 6443 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6444 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6445 break; 6446 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6447 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6448 break; 6449 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6450 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6451 break; 6452 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6453 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6454 break; 6455 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6456 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6457 break; 6458 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6459 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6460 break; 6461 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6462 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6463 break; 6464 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6465 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6466 break; 6467 default: 6468 break; 6469 } 6470 return 0; 6471 } 6472 6473 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6474 struct amdgpu_irq_src *source, 6475 unsigned int type, 6476 enum amdgpu_interrupt_state state) 6477 { 6478 int enable_flag; 6479 6480 switch (state) { 6481 case AMDGPU_IRQ_STATE_DISABLE: 6482 enable_flag = 0; 6483 break; 6484 6485 case AMDGPU_IRQ_STATE_ENABLE: 6486 enable_flag = 1; 6487 break; 6488 6489 default: 6490 return -EINVAL; 6491 } 6492 6493 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6494 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6495 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6496 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6497 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6498 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6499 enable_flag); 6500 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6501 enable_flag); 6502 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6503 enable_flag); 6504 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6505 enable_flag); 6506 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6507 enable_flag); 6508 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6509 enable_flag); 6510 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6511 enable_flag); 6512 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6513 enable_flag); 6514 6515 return 0; 6516 } 6517 6518 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6519 struct amdgpu_irq_src *source, 6520 unsigned int type, 6521 enum amdgpu_interrupt_state state) 6522 { 6523 int enable_flag; 6524 6525 switch (state) { 6526 case AMDGPU_IRQ_STATE_DISABLE: 6527 enable_flag = 1; 6528 break; 6529 6530 case AMDGPU_IRQ_STATE_ENABLE: 6531 enable_flag = 0; 6532 break; 6533 6534 default: 6535 return -EINVAL; 6536 } 6537 6538 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6539 enable_flag); 6540 6541 return 0; 6542 } 6543 6544 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6545 struct amdgpu_irq_src *source, 6546 struct amdgpu_iv_entry *entry) 6547 { 6548 int i; 6549 u8 me_id, pipe_id, queue_id; 6550 struct amdgpu_ring *ring; 6551 6552 DRM_DEBUG("IH: CP EOP\n"); 6553 me_id = (entry->ring_id & 0x0c) >> 2; 6554 pipe_id = (entry->ring_id & 0x03) >> 0; 6555 queue_id = (entry->ring_id & 0x70) >> 4; 6556 6557 switch (me_id) { 6558 case 0: 6559 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6560 break; 6561 case 1: 6562 case 2: 6563 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6564 ring = &adev->gfx.compute_ring[i]; 6565 /* Per-queue interrupt is supported for MEC starting from VI. 6566 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6567 */ 6568 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6569 amdgpu_fence_process(ring); 6570 } 6571 break; 6572 } 6573 return 0; 6574 } 6575 6576 static void gfx_v8_0_fault(struct amdgpu_device *adev, 6577 struct amdgpu_iv_entry *entry) 6578 { 6579 u8 me_id, pipe_id, queue_id; 6580 struct amdgpu_ring *ring; 6581 int i; 6582 6583 me_id = (entry->ring_id & 0x0c) >> 2; 6584 pipe_id = (entry->ring_id & 0x03) >> 0; 6585 queue_id = (entry->ring_id & 0x70) >> 4; 6586 6587 switch (me_id) { 6588 case 0: 6589 drm_sched_fault(&adev->gfx.gfx_ring[0].sched); 6590 break; 6591 case 1: 6592 case 2: 6593 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6594 ring = &adev->gfx.compute_ring[i]; 6595 if (ring->me == me_id && ring->pipe == pipe_id && 6596 ring->queue == queue_id) 6597 drm_sched_fault(&ring->sched); 6598 } 6599 break; 6600 } 6601 } 6602 6603 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6604 struct amdgpu_irq_src *source, 6605 struct amdgpu_iv_entry *entry) 6606 { 6607 DRM_ERROR("Illegal register access in command stream\n"); 6608 gfx_v8_0_fault(adev, entry); 6609 return 0; 6610 } 6611 6612 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6613 struct amdgpu_irq_src *source, 6614 struct amdgpu_iv_entry *entry) 6615 { 6616 DRM_ERROR("Illegal instruction in command stream\n"); 6617 gfx_v8_0_fault(adev, entry); 6618 return 0; 6619 } 6620 6621 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6622 struct amdgpu_irq_src *source, 6623 struct amdgpu_iv_entry *entry) 6624 { 6625 DRM_ERROR("CP EDC/ECC error detected."); 6626 return 0; 6627 } 6628 6629 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data, 6630 bool from_wq) 6631 { 6632 u32 enc, se_id, sh_id, cu_id; 6633 char type[20]; 6634 int sq_edc_source = -1; 6635 6636 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6637 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6638 6639 switch (enc) { 6640 case 0: 6641 DRM_INFO("SQ general purpose intr detected:" 6642 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6643 "host_cmd_overflow %d, cmd_timestamp %d," 6644 "reg_timestamp %d, thread_trace_buff_full %d," 6645 "wlt %d, thread_trace %d.\n", 6646 se_id, 6647 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6648 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6649 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6650 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6651 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 6652 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 6653 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 6654 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 6655 ); 6656 break; 6657 case 1: 6658 case 2: 6659 6660 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 6661 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 6662 6663 /* 6664 * This function can be called either directly from ISR 6665 * or from BH in which case we can access SQ_EDC_INFO 6666 * instance 6667 */ 6668 if (from_wq) { 6669 mutex_lock(&adev->grbm_idx_mutex); 6670 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0); 6671 6672 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 6673 6674 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 6675 mutex_unlock(&adev->grbm_idx_mutex); 6676 } 6677 6678 if (enc == 1) 6679 sprintf(type, "instruction intr"); 6680 else 6681 sprintf(type, "EDC/ECC error"); 6682 6683 DRM_INFO( 6684 "SQ %s detected: " 6685 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 6686 "trap %s, sq_ed_info.source %s.\n", 6687 type, se_id, sh_id, cu_id, 6688 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 6689 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 6690 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 6691 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 6692 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 6693 ); 6694 break; 6695 default: 6696 DRM_ERROR("SQ invalid encoding type\n."); 6697 } 6698 } 6699 6700 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 6701 { 6702 6703 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 6704 struct sq_work *sq_work = container_of(work, struct sq_work, work); 6705 6706 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data, true); 6707 } 6708 6709 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 6710 struct amdgpu_irq_src *source, 6711 struct amdgpu_iv_entry *entry) 6712 { 6713 unsigned ih_data = entry->src_data[0]; 6714 6715 /* 6716 * Try to submit work so SQ_EDC_INFO can be accessed from 6717 * BH. If previous work submission hasn't finished yet 6718 * just print whatever info is possible directly from the ISR. 6719 */ 6720 if (work_pending(&adev->gfx.sq_work.work)) { 6721 gfx_v8_0_parse_sq_irq(adev, ih_data, false); 6722 } else { 6723 adev->gfx.sq_work.ih_data = ih_data; 6724 schedule_work(&adev->gfx.sq_work.work); 6725 } 6726 6727 return 0; 6728 } 6729 6730 static void gfx_v8_0_emit_mem_sync(struct amdgpu_ring *ring) 6731 { 6732 amdgpu_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3)); 6733 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | 6734 PACKET3_TC_ACTION_ENA | 6735 PACKET3_SH_KCACHE_ACTION_ENA | 6736 PACKET3_SH_ICACHE_ACTION_ENA | 6737 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ 6738 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6739 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6740 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 6741 } 6742 6743 static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) 6744 { 6745 amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5)); 6746 amdgpu_ring_write(ring, PACKET3_TCL1_ACTION_ENA | 6747 PACKET3_TC_ACTION_ENA | 6748 PACKET3_SH_KCACHE_ACTION_ENA | 6749 PACKET3_SH_ICACHE_ACTION_ENA | 6750 PACKET3_TC_WB_ACTION_ENA); /* CP_COHER_CNTL */ 6751 amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */ 6752 amdgpu_ring_write(ring, 0xff); /* CP_COHER_SIZE_HI */ 6753 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */ 6754 amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */ 6755 amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ 6756 } 6757 6758 6759 /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ 6760 #define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f 6761 static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring, 6762 uint32_t pipe, bool enable) 6763 { 6764 uint32_t val; 6765 uint32_t wcl_cs_reg; 6766 6767 val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT; 6768 6769 switch (pipe) { 6770 case 0: 6771 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0; 6772 break; 6773 case 1: 6774 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1; 6775 break; 6776 case 2: 6777 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2; 6778 break; 6779 case 3: 6780 wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3; 6781 break; 6782 default: 6783 DRM_DEBUG("invalid pipe %d\n", pipe); 6784 return; 6785 } 6786 6787 amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); 6788 6789 } 6790 6791 #define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff 6792 static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) 6793 { 6794 struct amdgpu_device *adev = ring->adev; 6795 uint32_t val; 6796 int i; 6797 6798 /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit 6799 * number of gfx waves. Setting 5 bit will make sure gfx only gets 6800 * around 25% of gpu resources. 6801 */ 6802 val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; 6803 amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val); 6804 6805 /* Restrict waves for normal/low priority compute queues as well 6806 * to get best QoS for high priority compute jobs. 6807 * 6808 * amdgpu controls only 1st ME(0-3 CS pipes). 6809 */ 6810 for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { 6811 if (i != ring->pipe) 6812 gfx_v8_0_emit_wave_limit_cs(ring, i, enable); 6813 6814 } 6815 6816 } 6817 6818 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6819 .name = "gfx_v8_0", 6820 .early_init = gfx_v8_0_early_init, 6821 .late_init = gfx_v8_0_late_init, 6822 .sw_init = gfx_v8_0_sw_init, 6823 .sw_fini = gfx_v8_0_sw_fini, 6824 .hw_init = gfx_v8_0_hw_init, 6825 .hw_fini = gfx_v8_0_hw_fini, 6826 .suspend = gfx_v8_0_suspend, 6827 .resume = gfx_v8_0_resume, 6828 .is_idle = gfx_v8_0_is_idle, 6829 .wait_for_idle = gfx_v8_0_wait_for_idle, 6830 .check_soft_reset = gfx_v8_0_check_soft_reset, 6831 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 6832 .soft_reset = gfx_v8_0_soft_reset, 6833 .post_soft_reset = gfx_v8_0_post_soft_reset, 6834 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 6835 .set_powergating_state = gfx_v8_0_set_powergating_state, 6836 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 6837 }; 6838 6839 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 6840 .type = AMDGPU_RING_TYPE_GFX, 6841 .align_mask = 0xff, 6842 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6843 .support_64bit_ptrs = false, 6844 .get_rptr = gfx_v8_0_ring_get_rptr, 6845 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 6846 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 6847 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 6848 5 + /* COND_EXEC */ 6849 7 + /* PIPELINE_SYNC */ 6850 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 6851 12 + /* FENCE for VM_FLUSH */ 6852 20 + /* GDS switch */ 6853 4 + /* double SWITCH_BUFFER, 6854 the first COND_EXEC jump to the place just 6855 prior to this double SWITCH_BUFFER */ 6856 5 + /* COND_EXEC */ 6857 7 + /* HDP_flush */ 6858 4 + /* VGT_flush */ 6859 14 + /* CE_META */ 6860 31 + /* DE_META */ 6861 3 + /* CNTX_CTRL */ 6862 5 + /* HDP_INVL */ 6863 12 + 12 + /* FENCE x2 */ 6864 2 + /* SWITCH_BUFFER */ 6865 5, /* SURFACE_SYNC */ 6866 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 6867 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 6868 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 6869 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6870 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6871 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6872 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6873 .test_ring = gfx_v8_0_ring_test_ring, 6874 .test_ib = gfx_v8_0_ring_test_ib, 6875 .insert_nop = amdgpu_ring_insert_nop, 6876 .pad_ib = amdgpu_ring_generic_pad_ib, 6877 .emit_switch_buffer = gfx_v8_ring_emit_sb, 6878 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 6879 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 6880 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6881 .soft_recovery = gfx_v8_0_ring_soft_recovery, 6882 .emit_mem_sync = gfx_v8_0_emit_mem_sync, 6883 }; 6884 6885 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 6886 .type = AMDGPU_RING_TYPE_COMPUTE, 6887 .align_mask = 0xff, 6888 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6889 .support_64bit_ptrs = false, 6890 .get_rptr = gfx_v8_0_ring_get_rptr, 6891 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6892 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6893 .emit_frame_size = 6894 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6895 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6896 5 + /* hdp_invalidate */ 6897 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6898 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 6899 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 6900 7 + /* gfx_v8_0_emit_mem_sync_compute */ 6901 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ 6902 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ 6903 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ 6904 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 6905 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 6906 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 6907 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 6908 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 6909 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 6910 .test_ring = gfx_v8_0_ring_test_ring, 6911 .test_ib = gfx_v8_0_ring_test_ib, 6912 .insert_nop = amdgpu_ring_insert_nop, 6913 .pad_ib = amdgpu_ring_generic_pad_ib, 6914 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6915 .soft_recovery = gfx_v8_0_ring_soft_recovery, 6916 .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, 6917 .emit_wave_limit = gfx_v8_0_emit_wave_limit, 6918 }; 6919 6920 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 6921 .type = AMDGPU_RING_TYPE_KIQ, 6922 .align_mask = 0xff, 6923 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 6924 .support_64bit_ptrs = false, 6925 .get_rptr = gfx_v8_0_ring_get_rptr, 6926 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 6927 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 6928 .emit_frame_size = 6929 20 + /* gfx_v8_0_ring_emit_gds_switch */ 6930 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 6931 5 + /* hdp_invalidate */ 6932 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 6933 17 + /* gfx_v8_0_ring_emit_vm_flush */ 6934 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 6935 .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ 6936 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 6937 .test_ring = gfx_v8_0_ring_test_ring, 6938 .insert_nop = amdgpu_ring_insert_nop, 6939 .pad_ib = amdgpu_ring_generic_pad_ib, 6940 .emit_rreg = gfx_v8_0_ring_emit_rreg, 6941 .emit_wreg = gfx_v8_0_ring_emit_wreg, 6942 }; 6943 6944 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 6945 { 6946 int i; 6947 6948 adev->gfx.kiq[0].ring.funcs = &gfx_v8_0_ring_funcs_kiq; 6949 6950 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 6951 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 6952 6953 for (i = 0; i < adev->gfx.num_compute_rings; i++) 6954 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 6955 } 6956 6957 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 6958 .set = gfx_v8_0_set_eop_interrupt_state, 6959 .process = gfx_v8_0_eop_irq, 6960 }; 6961 6962 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 6963 .set = gfx_v8_0_set_priv_reg_fault_state, 6964 .process = gfx_v8_0_priv_reg_irq, 6965 }; 6966 6967 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 6968 .set = gfx_v8_0_set_priv_inst_fault_state, 6969 .process = gfx_v8_0_priv_inst_irq, 6970 }; 6971 6972 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 6973 .set = gfx_v8_0_set_cp_ecc_int_state, 6974 .process = gfx_v8_0_cp_ecc_error_irq, 6975 }; 6976 6977 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 6978 .set = gfx_v8_0_set_sq_int_state, 6979 .process = gfx_v8_0_sq_irq, 6980 }; 6981 6982 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 6983 { 6984 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 6985 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 6986 6987 adev->gfx.priv_reg_irq.num_types = 1; 6988 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 6989 6990 adev->gfx.priv_inst_irq.num_types = 1; 6991 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 6992 6993 adev->gfx.cp_ecc_error_irq.num_types = 1; 6994 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 6995 6996 adev->gfx.sq_irq.num_types = 1; 6997 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 6998 } 6999 7000 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7001 { 7002 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7003 } 7004 7005 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7006 { 7007 /* init asci gds info */ 7008 adev->gds.gds_size = RREG32(mmGDS_VMID0_SIZE); 7009 adev->gds.gws_size = 64; 7010 adev->gds.oa_size = 16; 7011 adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID); 7012 } 7013 7014 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7015 u32 bitmap) 7016 { 7017 u32 data; 7018 7019 if (!bitmap) 7020 return; 7021 7022 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7023 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7024 7025 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7026 } 7027 7028 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7029 { 7030 u32 data, mask; 7031 7032 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7033 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7034 7035 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7036 7037 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7038 } 7039 7040 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7041 { 7042 int i, j, k, counter, active_cu_number = 0; 7043 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7044 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7045 unsigned disable_masks[4 * 2]; 7046 u32 ao_cu_num; 7047 7048 memset(cu_info, 0, sizeof(*cu_info)); 7049 7050 if (adev->flags & AMD_IS_APU) 7051 ao_cu_num = 2; 7052 else 7053 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7054 7055 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7056 7057 mutex_lock(&adev->grbm_idx_mutex); 7058 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7059 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7060 mask = 1; 7061 ao_bitmap = 0; 7062 counter = 0; 7063 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0); 7064 if (i < 4 && j < 2) 7065 gfx_v8_0_set_user_cu_inactive_bitmap( 7066 adev, disable_masks[i * 2 + j]); 7067 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7068 cu_info->bitmap[0][i][j] = bitmap; 7069 7070 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7071 if (bitmap & mask) { 7072 if (counter < ao_cu_num) 7073 ao_bitmap |= mask; 7074 counter ++; 7075 } 7076 mask <<= 1; 7077 } 7078 active_cu_number += counter; 7079 if (i < 2 && j < 2) 7080 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7081 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7082 } 7083 } 7084 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); 7085 mutex_unlock(&adev->grbm_idx_mutex); 7086 7087 cu_info->number = active_cu_number; 7088 cu_info->ao_cu_mask = ao_cu_mask; 7089 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7090 cu_info->max_waves_per_simd = 10; 7091 cu_info->max_scratch_slots_per_cu = 32; 7092 cu_info->wave_front_size = 64; 7093 cu_info->lds_size = 64; 7094 } 7095 7096 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7097 { 7098 .type = AMD_IP_BLOCK_TYPE_GFX, 7099 .major = 8, 7100 .minor = 0, 7101 .rev = 0, 7102 .funcs = &gfx_v8_0_ip_funcs, 7103 }; 7104 7105 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7106 { 7107 .type = AMD_IP_BLOCK_TYPE_GFX, 7108 .major = 8, 7109 .minor = 1, 7110 .rev = 0, 7111 .funcs = &gfx_v8_0_ip_funcs, 7112 }; 7113 7114 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7115 { 7116 uint64_t ce_payload_addr; 7117 int cnt_ce; 7118 union { 7119 struct vi_ce_ib_state regular; 7120 struct vi_ce_ib_state_chained_ib chained; 7121 } ce_payload = {}; 7122 7123 if (ring->adev->virt.chained_ib_support) { 7124 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7125 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7126 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7127 } else { 7128 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7129 offsetof(struct vi_gfx_meta_data, ce_payload); 7130 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7131 } 7132 7133 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7134 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7135 WRITE_DATA_DST_SEL(8) | 7136 WR_CONFIRM) | 7137 WRITE_DATA_CACHE_POLICY(0)); 7138 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7139 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7140 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7141 } 7142 7143 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7144 { 7145 uint64_t de_payload_addr, gds_addr, csa_addr; 7146 int cnt_de; 7147 union { 7148 struct vi_de_ib_state regular; 7149 struct vi_de_ib_state_chained_ib chained; 7150 } de_payload = {}; 7151 7152 csa_addr = amdgpu_csa_vaddr(ring->adev); 7153 gds_addr = csa_addr + 4096; 7154 if (ring->adev->virt.chained_ib_support) { 7155 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7156 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7157 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7158 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7159 } else { 7160 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7161 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7162 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7163 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7164 } 7165 7166 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7167 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7168 WRITE_DATA_DST_SEL(8) | 7169 WR_CONFIRM) | 7170 WRITE_DATA_CACHE_POLICY(0)); 7171 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7172 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7173 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7174 } 7175