1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 #include <linux/kernel.h> 24 #include <linux/firmware.h> 25 #include <drm/drmP.h> 26 #include "amdgpu.h" 27 #include "amdgpu_gfx.h" 28 #include "vi.h" 29 #include "vi_structs.h" 30 #include "vid.h" 31 #include "amdgpu_ucode.h" 32 #include "amdgpu_atombios.h" 33 #include "atombios_i2c.h" 34 #include "clearstate_vi.h" 35 36 #include "gmc/gmc_8_2_d.h" 37 #include "gmc/gmc_8_2_sh_mask.h" 38 39 #include "oss/oss_3_0_d.h" 40 #include "oss/oss_3_0_sh_mask.h" 41 42 #include "bif/bif_5_0_d.h" 43 #include "bif/bif_5_0_sh_mask.h" 44 #include "gca/gfx_8_0_d.h" 45 #include "gca/gfx_8_0_enum.h" 46 #include "gca/gfx_8_0_sh_mask.h" 47 #include "gca/gfx_8_0_enum.h" 48 49 #include "dce/dce_10_0_d.h" 50 #include "dce/dce_10_0_sh_mask.h" 51 52 #include "smu/smu_7_1_3_d.h" 53 54 #include "ivsrcid/ivsrcid_vislands30.h" 55 56 #define GFX8_NUM_GFX_RINGS 1 57 #define GFX8_MEC_HPD_SIZE 2048 58 59 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 60 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 61 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002 62 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003 63 64 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT) 65 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT) 66 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT) 67 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT) 68 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT) 69 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT) 70 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT) 71 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT) 72 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT) 73 74 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L 75 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L 76 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L 77 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L 78 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L 79 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L 80 81 /* BPM SERDES CMD */ 82 #define SET_BPM_SERDES_CMD 1 83 #define CLE_BPM_SERDES_CMD 0 84 85 /* BPM Register Address*/ 86 enum { 87 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */ 88 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */ 89 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */ 90 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */ 91 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */ 92 BPM_REG_FGCG_MAX 93 }; 94 95 #define RLC_FormatDirectRegListLength 14 96 97 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin"); 98 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin"); 99 MODULE_FIRMWARE("amdgpu/carrizo_me.bin"); 100 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin"); 101 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin"); 102 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin"); 103 104 MODULE_FIRMWARE("amdgpu/stoney_ce.bin"); 105 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin"); 106 MODULE_FIRMWARE("amdgpu/stoney_me.bin"); 107 MODULE_FIRMWARE("amdgpu/stoney_mec.bin"); 108 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin"); 109 110 MODULE_FIRMWARE("amdgpu/tonga_ce.bin"); 111 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin"); 112 MODULE_FIRMWARE("amdgpu/tonga_me.bin"); 113 MODULE_FIRMWARE("amdgpu/tonga_mec.bin"); 114 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin"); 115 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin"); 116 117 MODULE_FIRMWARE("amdgpu/topaz_ce.bin"); 118 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin"); 119 MODULE_FIRMWARE("amdgpu/topaz_me.bin"); 120 MODULE_FIRMWARE("amdgpu/topaz_mec.bin"); 121 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin"); 122 123 MODULE_FIRMWARE("amdgpu/fiji_ce.bin"); 124 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin"); 125 MODULE_FIRMWARE("amdgpu/fiji_me.bin"); 126 MODULE_FIRMWARE("amdgpu/fiji_mec.bin"); 127 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin"); 128 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin"); 129 130 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin"); 131 MODULE_FIRMWARE("amdgpu/polaris10_ce_2.bin"); 132 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin"); 133 MODULE_FIRMWARE("amdgpu/polaris10_pfp_2.bin"); 134 MODULE_FIRMWARE("amdgpu/polaris10_me.bin"); 135 MODULE_FIRMWARE("amdgpu/polaris10_me_2.bin"); 136 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin"); 137 MODULE_FIRMWARE("amdgpu/polaris10_mec_2.bin"); 138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin"); 139 MODULE_FIRMWARE("amdgpu/polaris10_mec2_2.bin"); 140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin"); 141 142 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin"); 143 MODULE_FIRMWARE("amdgpu/polaris11_ce_2.bin"); 144 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin"); 145 MODULE_FIRMWARE("amdgpu/polaris11_pfp_2.bin"); 146 MODULE_FIRMWARE("amdgpu/polaris11_me.bin"); 147 MODULE_FIRMWARE("amdgpu/polaris11_me_2.bin"); 148 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin"); 149 MODULE_FIRMWARE("amdgpu/polaris11_mec_2.bin"); 150 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin"); 151 MODULE_FIRMWARE("amdgpu/polaris11_mec2_2.bin"); 152 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin"); 153 154 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin"); 155 MODULE_FIRMWARE("amdgpu/polaris12_ce_2.bin"); 156 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin"); 157 MODULE_FIRMWARE("amdgpu/polaris12_pfp_2.bin"); 158 MODULE_FIRMWARE("amdgpu/polaris12_me.bin"); 159 MODULE_FIRMWARE("amdgpu/polaris12_me_2.bin"); 160 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin"); 161 MODULE_FIRMWARE("amdgpu/polaris12_mec_2.bin"); 162 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin"); 163 MODULE_FIRMWARE("amdgpu/polaris12_mec2_2.bin"); 164 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin"); 165 166 MODULE_FIRMWARE("amdgpu/vegam_ce.bin"); 167 MODULE_FIRMWARE("amdgpu/vegam_pfp.bin"); 168 MODULE_FIRMWARE("amdgpu/vegam_me.bin"); 169 MODULE_FIRMWARE("amdgpu/vegam_mec.bin"); 170 MODULE_FIRMWARE("amdgpu/vegam_mec2.bin"); 171 MODULE_FIRMWARE("amdgpu/vegam_rlc.bin"); 172 173 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = 174 { 175 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, 176 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, 177 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, 178 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3}, 179 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4}, 180 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5}, 181 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6}, 182 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7}, 183 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8}, 184 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9}, 185 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10}, 186 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11}, 187 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12}, 188 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13}, 189 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14}, 190 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} 191 }; 192 193 static const u32 golden_settings_tonga_a11[] = 194 { 195 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208, 196 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 197 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 198 mmGB_GPU_ID, 0x0000000f, 0x00000000, 199 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 200 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc, 201 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 202 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 203 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 204 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 205 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 206 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 207 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb, 208 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b, 209 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876, 210 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 211 }; 212 213 static const u32 tonga_golden_common_all[] = 214 { 215 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 216 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 217 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 218 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 219 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 220 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 221 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 222 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 223 }; 224 225 static const u32 tonga_mgcg_cgcg_init[] = 226 { 227 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 228 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 229 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 230 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 231 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 232 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 233 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 234 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 235 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 236 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 237 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 238 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 239 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 240 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 241 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 242 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 243 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 244 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 245 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 246 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 247 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 248 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 249 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 250 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 251 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 252 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 253 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 254 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 255 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 256 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 257 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 258 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 259 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 260 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 261 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 262 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 263 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 264 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 265 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 266 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 267 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 268 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 269 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 270 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 271 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 272 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 273 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 274 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 275 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 276 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 277 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 278 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 279 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 280 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 281 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 282 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 283 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 284 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 285 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 286 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 287 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 288 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 289 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 290 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 291 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 292 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 293 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 294 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 295 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 296 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 297 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 298 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 299 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 300 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 301 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 302 }; 303 304 static const u32 golden_settings_vegam_a11[] = 305 { 306 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 307 mmCB_HW_CONTROL_2, 0x0f000000, 0x0d000000, 308 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 309 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 310 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 311 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 312 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x3a00161a, 313 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002e, 314 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 315 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 316 mmSQ_CONFIG, 0x07f80000, 0x01180000, 317 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 318 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 319 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 320 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 321 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x32761054, 322 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 323 }; 324 325 static const u32 vegam_golden_common_all[] = 326 { 327 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 328 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 329 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 330 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 331 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 332 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 333 }; 334 335 static const u32 golden_settings_polaris11_a11[] = 336 { 337 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208, 338 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 339 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 340 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 341 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 342 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 343 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 344 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 345 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 346 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 347 mmSQ_CONFIG, 0x07f80000, 0x01180000, 348 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 349 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 350 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3, 351 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 352 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210, 353 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 354 }; 355 356 static const u32 polaris11_golden_common_all[] = 357 { 358 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 359 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002, 360 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 361 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 362 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 363 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 364 }; 365 366 static const u32 golden_settings_polaris10_a11[] = 367 { 368 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200, 369 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208, 370 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000, 371 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 372 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 373 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 374 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 375 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012, 376 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a, 377 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 378 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c, 379 mmSQ_CONFIG, 0x07f80000, 0x07180000, 380 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 381 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 382 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7, 383 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 384 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 385 }; 386 387 static const u32 polaris10_golden_common_all[] = 388 { 389 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 390 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012, 391 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A, 392 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 393 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 394 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 395 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 396 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 397 }; 398 399 static const u32 fiji_golden_common_all[] = 400 { 401 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 402 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a, 403 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e, 404 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003, 405 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 406 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 407 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 408 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 409 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 410 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009, 411 }; 412 413 static const u32 golden_settings_fiji_a10[] = 414 { 415 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040, 416 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 417 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 418 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 419 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 420 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 421 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 422 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 423 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 424 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff, 425 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004, 426 }; 427 428 static const u32 fiji_mgcg_cgcg_init[] = 429 { 430 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 431 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 432 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 433 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 434 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 435 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 436 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100, 437 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 438 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 439 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 440 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 441 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 442 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 443 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 444 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 445 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 446 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 447 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 448 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 449 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 450 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 451 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 452 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 453 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 454 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 455 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 456 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 457 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 458 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 459 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 460 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 461 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 462 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 463 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 464 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 465 }; 466 467 static const u32 golden_settings_iceland_a11[] = 468 { 469 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 470 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 471 mmDB_DEBUG3, 0xc0000000, 0xc0000000, 472 mmGB_GPU_ID, 0x0000000f, 0x00000000, 473 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 474 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 475 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002, 476 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000, 477 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 478 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 479 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 480 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 481 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 482 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1, 483 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000, 484 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010, 485 }; 486 487 static const u32 iceland_golden_common_all[] = 488 { 489 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 490 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 491 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 492 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 493 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 494 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 495 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 496 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 497 }; 498 499 static const u32 iceland_mgcg_cgcg_init[] = 500 { 501 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 502 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 503 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 504 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 505 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100, 506 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100, 507 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100, 508 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 509 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 510 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 511 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 512 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 513 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 514 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 515 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 516 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 517 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 518 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 519 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 520 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 521 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 522 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 523 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100, 524 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 525 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 526 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 527 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 528 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 529 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 530 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 531 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 532 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 533 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 534 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 535 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 536 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 537 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 538 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 539 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 540 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 541 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 542 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 543 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 544 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 545 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 546 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 547 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 548 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 549 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 550 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 551 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 552 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 553 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 554 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87, 555 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 556 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 557 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 558 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 559 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 560 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 561 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 562 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 563 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 564 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c, 565 }; 566 567 static const u32 cz_golden_settings_a11[] = 568 { 569 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040, 570 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 571 mmGB_GPU_ID, 0x0000000f, 0x00000000, 572 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001, 573 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 574 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c, 575 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd, 576 mmTA_CNTL_AUX, 0x000f000f, 0x00010000, 577 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 578 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 579 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3, 580 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302 581 }; 582 583 static const u32 cz_golden_common_all[] = 584 { 585 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 586 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002, 587 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 588 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001, 589 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 590 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 591 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 592 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF 593 }; 594 595 static const u32 cz_mgcg_cgcg_init[] = 596 { 597 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, 598 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 599 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 600 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100, 601 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100, 602 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100, 603 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100, 604 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100, 605 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100, 606 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100, 607 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100, 608 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100, 609 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100, 610 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100, 611 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100, 612 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100, 613 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100, 614 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100, 615 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100, 616 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100, 617 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100, 618 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100, 619 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100, 620 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100, 621 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100, 622 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100, 623 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100, 624 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 625 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100, 626 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100, 627 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 628 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000, 629 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 630 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 631 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005, 632 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 633 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000, 634 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 635 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007, 636 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005, 637 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 638 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000, 639 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 640 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007, 641 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005, 642 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 643 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000, 644 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 645 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007, 646 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005, 647 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 648 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000, 649 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 650 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007, 651 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005, 652 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 653 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000, 654 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 655 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007, 656 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005, 657 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 658 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000, 659 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 660 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007, 661 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005, 662 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 663 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000, 664 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002, 665 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007, 666 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005, 667 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008, 668 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200, 669 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100, 670 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 671 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001, 672 }; 673 674 static const u32 stoney_golden_settings_a11[] = 675 { 676 mmDB_DEBUG2, 0xf00fffff, 0x00000400, 677 mmGB_GPU_ID, 0x0000000f, 0x00000000, 678 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001, 679 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000, 680 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c, 681 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000, 682 mmTCC_CTRL, 0x00100000, 0xf31fff7f, 683 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002, 684 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1, 685 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010, 686 }; 687 688 static const u32 stoney_golden_common_all[] = 689 { 690 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 691 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000, 692 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000, 693 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001, 694 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800, 695 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800, 696 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00FF7FBF, 697 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00FF7FAF, 698 }; 699 700 static const u32 stoney_mgcg_cgcg_init[] = 701 { 702 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000, 703 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f, 704 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 705 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, 706 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, 707 }; 708 709 710 static const char * const sq_edc_source_names[] = { 711 "SQ_EDC_INFO_SOURCE_INVALID: No EDC error has occurred", 712 "SQ_EDC_INFO_SOURCE_INST: EDC source is Instruction Fetch", 713 "SQ_EDC_INFO_SOURCE_SGPR: EDC source is SGPR or SQC data return", 714 "SQ_EDC_INFO_SOURCE_VGPR: EDC source is VGPR", 715 "SQ_EDC_INFO_SOURCE_LDS: EDC source is LDS", 716 "SQ_EDC_INFO_SOURCE_GDS: EDC source is GDS", 717 "SQ_EDC_INFO_SOURCE_TA: EDC source is TA", 718 }; 719 720 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); 721 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev); 722 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); 723 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); 724 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); 725 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); 726 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); 727 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); 728 729 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) 730 { 731 switch (adev->asic_type) { 732 case CHIP_TOPAZ: 733 amdgpu_device_program_register_sequence(adev, 734 iceland_mgcg_cgcg_init, 735 ARRAY_SIZE(iceland_mgcg_cgcg_init)); 736 amdgpu_device_program_register_sequence(adev, 737 golden_settings_iceland_a11, 738 ARRAY_SIZE(golden_settings_iceland_a11)); 739 amdgpu_device_program_register_sequence(adev, 740 iceland_golden_common_all, 741 ARRAY_SIZE(iceland_golden_common_all)); 742 break; 743 case CHIP_FIJI: 744 amdgpu_device_program_register_sequence(adev, 745 fiji_mgcg_cgcg_init, 746 ARRAY_SIZE(fiji_mgcg_cgcg_init)); 747 amdgpu_device_program_register_sequence(adev, 748 golden_settings_fiji_a10, 749 ARRAY_SIZE(golden_settings_fiji_a10)); 750 amdgpu_device_program_register_sequence(adev, 751 fiji_golden_common_all, 752 ARRAY_SIZE(fiji_golden_common_all)); 753 break; 754 755 case CHIP_TONGA: 756 amdgpu_device_program_register_sequence(adev, 757 tonga_mgcg_cgcg_init, 758 ARRAY_SIZE(tonga_mgcg_cgcg_init)); 759 amdgpu_device_program_register_sequence(adev, 760 golden_settings_tonga_a11, 761 ARRAY_SIZE(golden_settings_tonga_a11)); 762 amdgpu_device_program_register_sequence(adev, 763 tonga_golden_common_all, 764 ARRAY_SIZE(tonga_golden_common_all)); 765 break; 766 case CHIP_VEGAM: 767 amdgpu_device_program_register_sequence(adev, 768 golden_settings_vegam_a11, 769 ARRAY_SIZE(golden_settings_vegam_a11)); 770 amdgpu_device_program_register_sequence(adev, 771 vegam_golden_common_all, 772 ARRAY_SIZE(vegam_golden_common_all)); 773 break; 774 case CHIP_POLARIS11: 775 case CHIP_POLARIS12: 776 amdgpu_device_program_register_sequence(adev, 777 golden_settings_polaris11_a11, 778 ARRAY_SIZE(golden_settings_polaris11_a11)); 779 amdgpu_device_program_register_sequence(adev, 780 polaris11_golden_common_all, 781 ARRAY_SIZE(polaris11_golden_common_all)); 782 break; 783 case CHIP_POLARIS10: 784 amdgpu_device_program_register_sequence(adev, 785 golden_settings_polaris10_a11, 786 ARRAY_SIZE(golden_settings_polaris10_a11)); 787 amdgpu_device_program_register_sequence(adev, 788 polaris10_golden_common_all, 789 ARRAY_SIZE(polaris10_golden_common_all)); 790 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C); 791 if (adev->pdev->revision == 0xc7 && 792 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) || 793 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) || 794 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) { 795 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD); 796 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0); 797 } 798 break; 799 case CHIP_CARRIZO: 800 amdgpu_device_program_register_sequence(adev, 801 cz_mgcg_cgcg_init, 802 ARRAY_SIZE(cz_mgcg_cgcg_init)); 803 amdgpu_device_program_register_sequence(adev, 804 cz_golden_settings_a11, 805 ARRAY_SIZE(cz_golden_settings_a11)); 806 amdgpu_device_program_register_sequence(adev, 807 cz_golden_common_all, 808 ARRAY_SIZE(cz_golden_common_all)); 809 break; 810 case CHIP_STONEY: 811 amdgpu_device_program_register_sequence(adev, 812 stoney_mgcg_cgcg_init, 813 ARRAY_SIZE(stoney_mgcg_cgcg_init)); 814 amdgpu_device_program_register_sequence(adev, 815 stoney_golden_settings_a11, 816 ARRAY_SIZE(stoney_golden_settings_a11)); 817 amdgpu_device_program_register_sequence(adev, 818 stoney_golden_common_all, 819 ARRAY_SIZE(stoney_golden_common_all)); 820 break; 821 default: 822 break; 823 } 824 } 825 826 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) 827 { 828 adev->gfx.scratch.num_reg = 8; 829 adev->gfx.scratch.reg_base = mmSCRATCH_REG0; 830 adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; 831 } 832 833 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) 834 { 835 struct amdgpu_device *adev = ring->adev; 836 uint32_t scratch; 837 uint32_t tmp = 0; 838 unsigned i; 839 int r; 840 841 r = amdgpu_gfx_scratch_get(adev, &scratch); 842 if (r) { 843 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); 844 return r; 845 } 846 WREG32(scratch, 0xCAFEDEAD); 847 r = amdgpu_ring_alloc(ring, 3); 848 if (r) { 849 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", 850 ring->idx, r); 851 amdgpu_gfx_scratch_free(adev, scratch); 852 return r; 853 } 854 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 855 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 856 amdgpu_ring_write(ring, 0xDEADBEEF); 857 amdgpu_ring_commit(ring); 858 859 for (i = 0; i < adev->usec_timeout; i++) { 860 tmp = RREG32(scratch); 861 if (tmp == 0xDEADBEEF) 862 break; 863 DRM_UDELAY(1); 864 } 865 if (i < adev->usec_timeout) { 866 DRM_DEBUG("ring test on %d succeeded in %d usecs\n", 867 ring->idx, i); 868 } else { 869 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", 870 ring->idx, scratch, tmp); 871 r = -EINVAL; 872 } 873 amdgpu_gfx_scratch_free(adev, scratch); 874 return r; 875 } 876 877 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) 878 { 879 struct amdgpu_device *adev = ring->adev; 880 struct amdgpu_ib ib; 881 struct dma_fence *f = NULL; 882 883 unsigned int index; 884 uint64_t gpu_addr; 885 uint32_t tmp; 886 long r; 887 888 r = amdgpu_device_wb_get(adev, &index); 889 if (r) { 890 dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); 891 return r; 892 } 893 894 gpu_addr = adev->wb.gpu_addr + (index * 4); 895 adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); 896 memset(&ib, 0, sizeof(ib)); 897 r = amdgpu_ib_get(adev, NULL, 16, &ib); 898 if (r) { 899 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); 900 goto err1; 901 } 902 ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); 903 ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 904 ib.ptr[2] = lower_32_bits(gpu_addr); 905 ib.ptr[3] = upper_32_bits(gpu_addr); 906 ib.ptr[4] = 0xDEADBEEF; 907 ib.length_dw = 5; 908 909 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 910 if (r) 911 goto err2; 912 913 r = dma_fence_wait_timeout(f, false, timeout); 914 if (r == 0) { 915 DRM_ERROR("amdgpu: IB test timed out.\n"); 916 r = -ETIMEDOUT; 917 goto err2; 918 } else if (r < 0) { 919 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); 920 goto err2; 921 } 922 923 tmp = adev->wb.wb[index]; 924 if (tmp == 0xDEADBEEF) { 925 DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); 926 r = 0; 927 } else { 928 DRM_ERROR("ib test on ring %d failed\n", ring->idx); 929 r = -EINVAL; 930 } 931 932 err2: 933 amdgpu_ib_free(adev, &ib, NULL); 934 dma_fence_put(f); 935 err1: 936 amdgpu_device_wb_free(adev, index); 937 return r; 938 } 939 940 941 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) 942 { 943 release_firmware(adev->gfx.pfp_fw); 944 adev->gfx.pfp_fw = NULL; 945 release_firmware(adev->gfx.me_fw); 946 adev->gfx.me_fw = NULL; 947 release_firmware(adev->gfx.ce_fw); 948 adev->gfx.ce_fw = NULL; 949 release_firmware(adev->gfx.rlc_fw); 950 adev->gfx.rlc_fw = NULL; 951 release_firmware(adev->gfx.mec_fw); 952 adev->gfx.mec_fw = NULL; 953 if ((adev->asic_type != CHIP_STONEY) && 954 (adev->asic_type != CHIP_TOPAZ)) 955 release_firmware(adev->gfx.mec2_fw); 956 adev->gfx.mec2_fw = NULL; 957 958 kfree(adev->gfx.rlc.register_list_format); 959 } 960 961 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) 962 { 963 const char *chip_name; 964 char fw_name[30]; 965 int err; 966 struct amdgpu_firmware_info *info = NULL; 967 const struct common_firmware_header *header = NULL; 968 const struct gfx_firmware_header_v1_0 *cp_hdr; 969 const struct rlc_firmware_header_v2_0 *rlc_hdr; 970 unsigned int *tmp = NULL, i; 971 972 DRM_DEBUG("\n"); 973 974 switch (adev->asic_type) { 975 case CHIP_TOPAZ: 976 chip_name = "topaz"; 977 break; 978 case CHIP_TONGA: 979 chip_name = "tonga"; 980 break; 981 case CHIP_CARRIZO: 982 chip_name = "carrizo"; 983 break; 984 case CHIP_FIJI: 985 chip_name = "fiji"; 986 break; 987 case CHIP_STONEY: 988 chip_name = "stoney"; 989 break; 990 case CHIP_POLARIS10: 991 chip_name = "polaris10"; 992 break; 993 case CHIP_POLARIS11: 994 chip_name = "polaris11"; 995 break; 996 case CHIP_POLARIS12: 997 chip_name = "polaris12"; 998 break; 999 case CHIP_VEGAM: 1000 chip_name = "vegam"; 1001 break; 1002 default: 1003 BUG(); 1004 } 1005 1006 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1007 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp_2.bin", chip_name); 1008 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1009 if (err == -ENOENT) { 1010 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1011 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1012 } 1013 } else { 1014 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); 1015 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); 1016 } 1017 if (err) 1018 goto out; 1019 err = amdgpu_ucode_validate(adev->gfx.pfp_fw); 1020 if (err) 1021 goto out; 1022 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1023 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1024 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1025 1026 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1027 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me_2.bin", chip_name); 1028 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1029 if (err == -ENOENT) { 1030 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1031 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1032 } 1033 } else { 1034 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); 1035 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); 1036 } 1037 if (err) 1038 goto out; 1039 err = amdgpu_ucode_validate(adev->gfx.me_fw); 1040 if (err) 1041 goto out; 1042 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1043 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1044 1045 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1046 1047 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1048 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce_2.bin", chip_name); 1049 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1050 if (err == -ENOENT) { 1051 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1052 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1053 } 1054 } else { 1055 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); 1056 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); 1057 } 1058 if (err) 1059 goto out; 1060 err = amdgpu_ucode_validate(adev->gfx.ce_fw); 1061 if (err) 1062 goto out; 1063 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1064 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1065 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1066 1067 /* 1068 * Support for MCBP/Virtualization in combination with chained IBs is 1069 * formal released on feature version #46 1070 */ 1071 if (adev->gfx.ce_feature_version >= 46 && 1072 adev->gfx.pfp_feature_version >= 46) { 1073 adev->virt.chained_ib_support = true; 1074 DRM_INFO("Chained IB support enabled!\n"); 1075 } else 1076 adev->virt.chained_ib_support = false; 1077 1078 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); 1079 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); 1080 if (err) 1081 goto out; 1082 err = amdgpu_ucode_validate(adev->gfx.rlc_fw); 1083 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 1084 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); 1085 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); 1086 1087 adev->gfx.rlc.save_and_restore_offset = 1088 le32_to_cpu(rlc_hdr->save_and_restore_offset); 1089 adev->gfx.rlc.clear_state_descriptor_offset = 1090 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); 1091 adev->gfx.rlc.avail_scratch_ram_locations = 1092 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); 1093 adev->gfx.rlc.reg_restore_list_size = 1094 le32_to_cpu(rlc_hdr->reg_restore_list_size); 1095 adev->gfx.rlc.reg_list_format_start = 1096 le32_to_cpu(rlc_hdr->reg_list_format_start); 1097 adev->gfx.rlc.reg_list_format_separate_start = 1098 le32_to_cpu(rlc_hdr->reg_list_format_separate_start); 1099 adev->gfx.rlc.starting_offsets_start = 1100 le32_to_cpu(rlc_hdr->starting_offsets_start); 1101 adev->gfx.rlc.reg_list_format_size_bytes = 1102 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); 1103 adev->gfx.rlc.reg_list_size_bytes = 1104 le32_to_cpu(rlc_hdr->reg_list_size_bytes); 1105 1106 adev->gfx.rlc.register_list_format = 1107 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + 1108 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); 1109 1110 if (!adev->gfx.rlc.register_list_format) { 1111 err = -ENOMEM; 1112 goto out; 1113 } 1114 1115 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1116 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); 1117 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) 1118 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); 1119 1120 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; 1121 1122 tmp = (unsigned int *)((uintptr_t)rlc_hdr + 1123 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); 1124 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) 1125 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); 1126 1127 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1128 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec_2.bin", chip_name); 1129 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1130 if (err == -ENOENT) { 1131 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1132 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1133 } 1134 } else { 1135 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); 1136 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); 1137 } 1138 if (err) 1139 goto out; 1140 err = amdgpu_ucode_validate(adev->gfx.mec_fw); 1141 if (err) 1142 goto out; 1143 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1144 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); 1145 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); 1146 1147 if ((adev->asic_type != CHIP_STONEY) && 1148 (adev->asic_type != CHIP_TOPAZ)) { 1149 if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { 1150 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2_2.bin", chip_name); 1151 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1152 if (err == -ENOENT) { 1153 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1154 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1155 } 1156 } else { 1157 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); 1158 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); 1159 } 1160 if (!err) { 1161 err = amdgpu_ucode_validate(adev->gfx.mec2_fw); 1162 if (err) 1163 goto out; 1164 cp_hdr = (const struct gfx_firmware_header_v1_0 *) 1165 adev->gfx.mec2_fw->data; 1166 adev->gfx.mec2_fw_version = 1167 le32_to_cpu(cp_hdr->header.ucode_version); 1168 adev->gfx.mec2_feature_version = 1169 le32_to_cpu(cp_hdr->ucode_feature_version); 1170 } else { 1171 err = 0; 1172 adev->gfx.mec2_fw = NULL; 1173 } 1174 } 1175 1176 if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { 1177 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; 1178 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; 1179 info->fw = adev->gfx.pfp_fw; 1180 header = (const struct common_firmware_header *)info->fw->data; 1181 adev->firmware.fw_size += 1182 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1183 1184 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; 1185 info->ucode_id = AMDGPU_UCODE_ID_CP_ME; 1186 info->fw = adev->gfx.me_fw; 1187 header = (const struct common_firmware_header *)info->fw->data; 1188 adev->firmware.fw_size += 1189 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1190 1191 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; 1192 info->ucode_id = AMDGPU_UCODE_ID_CP_CE; 1193 info->fw = adev->gfx.ce_fw; 1194 header = (const struct common_firmware_header *)info->fw->data; 1195 adev->firmware.fw_size += 1196 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1197 1198 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; 1199 info->ucode_id = AMDGPU_UCODE_ID_RLC_G; 1200 info->fw = adev->gfx.rlc_fw; 1201 header = (const struct common_firmware_header *)info->fw->data; 1202 adev->firmware.fw_size += 1203 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1204 1205 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; 1206 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; 1207 info->fw = adev->gfx.mec_fw; 1208 header = (const struct common_firmware_header *)info->fw->data; 1209 adev->firmware.fw_size += 1210 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1211 1212 /* we need account JT in */ 1213 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1214 adev->firmware.fw_size += 1215 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); 1216 1217 if (amdgpu_sriov_vf(adev)) { 1218 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; 1219 info->ucode_id = AMDGPU_UCODE_ID_STORAGE; 1220 info->fw = adev->gfx.mec_fw; 1221 adev->firmware.fw_size += 1222 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); 1223 } 1224 1225 if (adev->gfx.mec2_fw) { 1226 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; 1227 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; 1228 info->fw = adev->gfx.mec2_fw; 1229 header = (const struct common_firmware_header *)info->fw->data; 1230 adev->firmware.fw_size += 1231 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); 1232 } 1233 1234 } 1235 1236 out: 1237 if (err) { 1238 dev_err(adev->dev, 1239 "gfx8: Failed to load firmware \"%s\"\n", 1240 fw_name); 1241 release_firmware(adev->gfx.pfp_fw); 1242 adev->gfx.pfp_fw = NULL; 1243 release_firmware(adev->gfx.me_fw); 1244 adev->gfx.me_fw = NULL; 1245 release_firmware(adev->gfx.ce_fw); 1246 adev->gfx.ce_fw = NULL; 1247 release_firmware(adev->gfx.rlc_fw); 1248 adev->gfx.rlc_fw = NULL; 1249 release_firmware(adev->gfx.mec_fw); 1250 adev->gfx.mec_fw = NULL; 1251 release_firmware(adev->gfx.mec2_fw); 1252 adev->gfx.mec2_fw = NULL; 1253 } 1254 return err; 1255 } 1256 1257 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, 1258 volatile u32 *buffer) 1259 { 1260 u32 count = 0, i; 1261 const struct cs_section_def *sect = NULL; 1262 const struct cs_extent_def *ext = NULL; 1263 1264 if (adev->gfx.rlc.cs_data == NULL) 1265 return; 1266 if (buffer == NULL) 1267 return; 1268 1269 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1270 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 1271 1272 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 1273 buffer[count++] = cpu_to_le32(0x80000000); 1274 buffer[count++] = cpu_to_le32(0x80000000); 1275 1276 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { 1277 for (ext = sect->section; ext->extent != NULL; ++ext) { 1278 if (sect->id == SECT_CONTEXT) { 1279 buffer[count++] = 1280 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); 1281 buffer[count++] = cpu_to_le32(ext->reg_index - 1282 PACKET3_SET_CONTEXT_REG_START); 1283 for (i = 0; i < ext->reg_count; i++) 1284 buffer[count++] = cpu_to_le32(ext->extent[i]); 1285 } else { 1286 return; 1287 } 1288 } 1289 } 1290 1291 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 1292 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG - 1293 PACKET3_SET_CONTEXT_REG_START); 1294 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config); 1295 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1); 1296 1297 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 1298 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); 1299 1300 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); 1301 buffer[count++] = cpu_to_le32(0); 1302 } 1303 1304 static void cz_init_cp_jump_table(struct amdgpu_device *adev) 1305 { 1306 const __le32 *fw_data; 1307 volatile u32 *dst_ptr; 1308 int me, i, max_me = 4; 1309 u32 bo_offset = 0; 1310 u32 table_offset, table_size; 1311 1312 if (adev->asic_type == CHIP_CARRIZO) 1313 max_me = 5; 1314 1315 /* write the cp table buffer */ 1316 dst_ptr = adev->gfx.rlc.cp_table_ptr; 1317 for (me = 0; me < max_me; me++) { 1318 if (me == 0) { 1319 const struct gfx_firmware_header_v1_0 *hdr = 1320 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 1321 fw_data = (const __le32 *) 1322 (adev->gfx.ce_fw->data + 1323 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1324 table_offset = le32_to_cpu(hdr->jt_offset); 1325 table_size = le32_to_cpu(hdr->jt_size); 1326 } else if (me == 1) { 1327 const struct gfx_firmware_header_v1_0 *hdr = 1328 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 1329 fw_data = (const __le32 *) 1330 (adev->gfx.pfp_fw->data + 1331 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1332 table_offset = le32_to_cpu(hdr->jt_offset); 1333 table_size = le32_to_cpu(hdr->jt_size); 1334 } else if (me == 2) { 1335 const struct gfx_firmware_header_v1_0 *hdr = 1336 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 1337 fw_data = (const __le32 *) 1338 (adev->gfx.me_fw->data + 1339 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1340 table_offset = le32_to_cpu(hdr->jt_offset); 1341 table_size = le32_to_cpu(hdr->jt_size); 1342 } else if (me == 3) { 1343 const struct gfx_firmware_header_v1_0 *hdr = 1344 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 1345 fw_data = (const __le32 *) 1346 (adev->gfx.mec_fw->data + 1347 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1348 table_offset = le32_to_cpu(hdr->jt_offset); 1349 table_size = le32_to_cpu(hdr->jt_size); 1350 } else if (me == 4) { 1351 const struct gfx_firmware_header_v1_0 *hdr = 1352 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 1353 fw_data = (const __le32 *) 1354 (adev->gfx.mec2_fw->data + 1355 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 1356 table_offset = le32_to_cpu(hdr->jt_offset); 1357 table_size = le32_to_cpu(hdr->jt_size); 1358 } 1359 1360 for (i = 0; i < table_size; i ++) { 1361 dst_ptr[bo_offset + i] = 1362 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); 1363 } 1364 1365 bo_offset += table_size; 1366 } 1367 } 1368 1369 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) 1370 { 1371 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); 1372 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); 1373 } 1374 1375 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) 1376 { 1377 volatile u32 *dst_ptr; 1378 u32 dws; 1379 const struct cs_section_def *cs_data; 1380 int r; 1381 1382 adev->gfx.rlc.cs_data = vi_cs_data; 1383 1384 cs_data = adev->gfx.rlc.cs_data; 1385 1386 if (cs_data) { 1387 /* clear state block */ 1388 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); 1389 1390 r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, 1391 AMDGPU_GEM_DOMAIN_VRAM, 1392 &adev->gfx.rlc.clear_state_obj, 1393 &adev->gfx.rlc.clear_state_gpu_addr, 1394 (void **)&adev->gfx.rlc.cs_ptr); 1395 if (r) { 1396 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); 1397 gfx_v8_0_rlc_fini(adev); 1398 return r; 1399 } 1400 1401 /* set up the cs buffer */ 1402 dst_ptr = adev->gfx.rlc.cs_ptr; 1403 gfx_v8_0_get_csb_buffer(adev, dst_ptr); 1404 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); 1405 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); 1406 } 1407 1408 if ((adev->asic_type == CHIP_CARRIZO) || 1409 (adev->asic_type == CHIP_STONEY)) { 1410 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ 1411 r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, 1412 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 1413 &adev->gfx.rlc.cp_table_obj, 1414 &adev->gfx.rlc.cp_table_gpu_addr, 1415 (void **)&adev->gfx.rlc.cp_table_ptr); 1416 if (r) { 1417 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); 1418 return r; 1419 } 1420 1421 cz_init_cp_jump_table(adev); 1422 1423 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); 1424 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); 1425 } 1426 1427 return 0; 1428 } 1429 1430 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) 1431 { 1432 amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL); 1433 } 1434 1435 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1436 { 1437 int r; 1438 u32 *hpd; 1439 size_t mec_hpd_size; 1440 1441 bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); 1442 1443 /* take ownership of the relevant compute queues */ 1444 amdgpu_gfx_compute_queue_acquire(adev); 1445 1446 mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; 1447 1448 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, 1449 AMDGPU_GEM_DOMAIN_GTT, 1450 &adev->gfx.mec.hpd_eop_obj, 1451 &adev->gfx.mec.hpd_eop_gpu_addr, 1452 (void **)&hpd); 1453 if (r) { 1454 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r); 1455 return r; 1456 } 1457 1458 memset(hpd, 0, mec_hpd_size); 1459 1460 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); 1461 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); 1462 1463 return 0; 1464 } 1465 1466 static const u32 vgpr_init_compute_shader[] = 1467 { 1468 0x7e000209, 0x7e020208, 1469 0x7e040207, 0x7e060206, 1470 0x7e080205, 0x7e0a0204, 1471 0x7e0c0203, 0x7e0e0202, 1472 0x7e100201, 0x7e120200, 1473 0x7e140209, 0x7e160208, 1474 0x7e180207, 0x7e1a0206, 1475 0x7e1c0205, 0x7e1e0204, 1476 0x7e200203, 0x7e220202, 1477 0x7e240201, 0x7e260200, 1478 0x7e280209, 0x7e2a0208, 1479 0x7e2c0207, 0x7e2e0206, 1480 0x7e300205, 0x7e320204, 1481 0x7e340203, 0x7e360202, 1482 0x7e380201, 0x7e3a0200, 1483 0x7e3c0209, 0x7e3e0208, 1484 0x7e400207, 0x7e420206, 1485 0x7e440205, 0x7e460204, 1486 0x7e480203, 0x7e4a0202, 1487 0x7e4c0201, 0x7e4e0200, 1488 0x7e500209, 0x7e520208, 1489 0x7e540207, 0x7e560206, 1490 0x7e580205, 0x7e5a0204, 1491 0x7e5c0203, 0x7e5e0202, 1492 0x7e600201, 0x7e620200, 1493 0x7e640209, 0x7e660208, 1494 0x7e680207, 0x7e6a0206, 1495 0x7e6c0205, 0x7e6e0204, 1496 0x7e700203, 0x7e720202, 1497 0x7e740201, 0x7e760200, 1498 0x7e780209, 0x7e7a0208, 1499 0x7e7c0207, 0x7e7e0206, 1500 0xbf8a0000, 0xbf810000, 1501 }; 1502 1503 static const u32 sgpr_init_compute_shader[] = 1504 { 1505 0xbe8a0100, 0xbe8c0102, 1506 0xbe8e0104, 0xbe900106, 1507 0xbe920108, 0xbe940100, 1508 0xbe960102, 0xbe980104, 1509 0xbe9a0106, 0xbe9c0108, 1510 0xbe9e0100, 0xbea00102, 1511 0xbea20104, 0xbea40106, 1512 0xbea60108, 0xbea80100, 1513 0xbeaa0102, 0xbeac0104, 1514 0xbeae0106, 0xbeb00108, 1515 0xbeb20100, 0xbeb40102, 1516 0xbeb60104, 0xbeb80106, 1517 0xbeba0108, 0xbebc0100, 1518 0xbebe0102, 0xbec00104, 1519 0xbec20106, 0xbec40108, 1520 0xbec60100, 0xbec80102, 1521 0xbee60004, 0xbee70005, 1522 0xbeea0006, 0xbeeb0007, 1523 0xbee80008, 0xbee90009, 1524 0xbefc0000, 0xbf8a0000, 1525 0xbf810000, 0x00000000, 1526 }; 1527 1528 static const u32 vgpr_init_regs[] = 1529 { 1530 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff, 1531 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1532 mmCOMPUTE_NUM_THREAD_X, 256*4, 1533 mmCOMPUTE_NUM_THREAD_Y, 1, 1534 mmCOMPUTE_NUM_THREAD_Z, 1, 1535 mmCOMPUTE_PGM_RSRC1, 0x100004f, /* VGPRS=15 (64 logical VGPRs), SGPRS=1 (16 SGPRs), BULKY=1 */ 1536 mmCOMPUTE_PGM_RSRC2, 20, 1537 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1538 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1539 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1540 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1541 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1542 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1543 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1544 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1545 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1546 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1547 }; 1548 1549 static const u32 sgpr1_init_regs[] = 1550 { 1551 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f, 1552 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, /* CU_GROUP_COUNT=1 */ 1553 mmCOMPUTE_NUM_THREAD_X, 256*5, 1554 mmCOMPUTE_NUM_THREAD_Y, 1, 1555 mmCOMPUTE_NUM_THREAD_Z, 1, 1556 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1557 mmCOMPUTE_PGM_RSRC2, 20, 1558 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1559 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1560 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1561 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1562 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1563 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1564 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1565 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1566 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1567 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1568 }; 1569 1570 static const u32 sgpr2_init_regs[] = 1571 { 1572 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0, 1573 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000, 1574 mmCOMPUTE_NUM_THREAD_X, 256*5, 1575 mmCOMPUTE_NUM_THREAD_Y, 1, 1576 mmCOMPUTE_NUM_THREAD_Z, 1, 1577 mmCOMPUTE_PGM_RSRC1, 0x240, /* SGPRS=9 (80 GPRS) */ 1578 mmCOMPUTE_PGM_RSRC2, 20, 1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00, 1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01, 1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02, 1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03, 1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04, 1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05, 1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06, 1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07, 1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08, 1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09, 1589 }; 1590 1591 static const u32 sec_ded_counter_registers[] = 1592 { 1593 mmCPC_EDC_ATC_CNT, 1594 mmCPC_EDC_SCRATCH_CNT, 1595 mmCPC_EDC_UCODE_CNT, 1596 mmCPF_EDC_ATC_CNT, 1597 mmCPF_EDC_ROQ_CNT, 1598 mmCPF_EDC_TAG_CNT, 1599 mmCPG_EDC_ATC_CNT, 1600 mmCPG_EDC_DMA_CNT, 1601 mmCPG_EDC_TAG_CNT, 1602 mmDC_EDC_CSINVOC_CNT, 1603 mmDC_EDC_RESTORE_CNT, 1604 mmDC_EDC_STATE_CNT, 1605 mmGDS_EDC_CNT, 1606 mmGDS_EDC_GRBM_CNT, 1607 mmGDS_EDC_OA_DED, 1608 mmSPI_EDC_CNT, 1609 mmSQC_ATC_EDC_GATCL1_CNT, 1610 mmSQC_EDC_CNT, 1611 mmSQ_EDC_DED_CNT, 1612 mmSQ_EDC_INFO, 1613 mmSQ_EDC_SEC_CNT, 1614 mmTCC_EDC_CNT, 1615 mmTCP_ATC_EDC_GATCL1_CNT, 1616 mmTCP_EDC_CNT, 1617 mmTD_EDC_CNT 1618 }; 1619 1620 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) 1621 { 1622 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0]; 1623 struct amdgpu_ib ib; 1624 struct dma_fence *f = NULL; 1625 int r, i; 1626 u32 tmp; 1627 unsigned total_size, vgpr_offset, sgpr_offset; 1628 u64 gpu_addr; 1629 1630 /* only supported on CZ */ 1631 if (adev->asic_type != CHIP_CARRIZO) 1632 return 0; 1633 1634 /* bail if the compute ring is not ready */ 1635 if (!ring->ready) 1636 return 0; 1637 1638 tmp = RREG32(mmGB_EDC_MODE); 1639 WREG32(mmGB_EDC_MODE, 0); 1640 1641 total_size = 1642 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1643 total_size += 1644 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1645 total_size += 1646 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4; 1647 total_size = ALIGN(total_size, 256); 1648 vgpr_offset = total_size; 1649 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); 1650 sgpr_offset = total_size; 1651 total_size += sizeof(sgpr_init_compute_shader); 1652 1653 /* allocate an indirect buffer to put the commands in */ 1654 memset(&ib, 0, sizeof(ib)); 1655 r = amdgpu_ib_get(adev, NULL, total_size, &ib); 1656 if (r) { 1657 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); 1658 return r; 1659 } 1660 1661 /* load the compute shaders */ 1662 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++) 1663 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i]; 1664 1665 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++) 1666 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i]; 1667 1668 /* init the ib length to 0 */ 1669 ib.length_dw = 0; 1670 1671 /* VGPR */ 1672 /* write the register state for the compute dispatch */ 1673 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) { 1674 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1675 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START; 1676 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1]; 1677 } 1678 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1679 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8; 1680 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1681 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1682 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1683 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1684 1685 /* write dispatch packet */ 1686 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1687 ib.ptr[ib.length_dw++] = 8; /* x */ 1688 ib.ptr[ib.length_dw++] = 1; /* y */ 1689 ib.ptr[ib.length_dw++] = 1; /* z */ 1690 ib.ptr[ib.length_dw++] = 1691 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1692 1693 /* write CS partial flush packet */ 1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1695 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1696 1697 /* SGPR1 */ 1698 /* write the register state for the compute dispatch */ 1699 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) { 1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1701 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START; 1702 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1]; 1703 } 1704 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1705 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1707 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1708 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1709 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1710 1711 /* write dispatch packet */ 1712 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1713 ib.ptr[ib.length_dw++] = 8; /* x */ 1714 ib.ptr[ib.length_dw++] = 1; /* y */ 1715 ib.ptr[ib.length_dw++] = 1; /* z */ 1716 ib.ptr[ib.length_dw++] = 1717 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1718 1719 /* write CS partial flush packet */ 1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1721 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1722 1723 /* SGPR2 */ 1724 /* write the register state for the compute dispatch */ 1725 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) { 1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); 1727 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START; 1728 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1]; 1729 } 1730 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ 1731 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8; 1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); 1733 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 1734 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr); 1735 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr); 1736 1737 /* write dispatch packet */ 1738 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 1739 ib.ptr[ib.length_dw++] = 8; /* x */ 1740 ib.ptr[ib.length_dw++] = 1; /* y */ 1741 ib.ptr[ib.length_dw++] = 1; /* z */ 1742 ib.ptr[ib.length_dw++] = 1743 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); 1744 1745 /* write CS partial flush packet */ 1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0); 1747 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); 1748 1749 /* shedule the ib on the ring */ 1750 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); 1751 if (r) { 1752 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); 1753 goto fail; 1754 } 1755 1756 /* wait for the GPU to finish processing the IB */ 1757 r = dma_fence_wait(f, false); 1758 if (r) { 1759 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); 1760 goto fail; 1761 } 1762 1763 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2); 1764 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1); 1765 WREG32(mmGB_EDC_MODE, tmp); 1766 1767 tmp = RREG32(mmCC_GC_EDC_CONFIG); 1768 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1; 1769 WREG32(mmCC_GC_EDC_CONFIG, tmp); 1770 1771 1772 /* read back registers to clear the counters */ 1773 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) 1774 RREG32(sec_ded_counter_registers[i]); 1775 1776 fail: 1777 amdgpu_ib_free(adev, &ib, NULL); 1778 dma_fence_put(f); 1779 1780 return r; 1781 } 1782 1783 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) 1784 { 1785 u32 gb_addr_config; 1786 u32 mc_shared_chmap, mc_arb_ramcfg; 1787 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; 1788 u32 tmp; 1789 int ret; 1790 1791 switch (adev->asic_type) { 1792 case CHIP_TOPAZ: 1793 adev->gfx.config.max_shader_engines = 1; 1794 adev->gfx.config.max_tile_pipes = 2; 1795 adev->gfx.config.max_cu_per_sh = 6; 1796 adev->gfx.config.max_sh_per_se = 1; 1797 adev->gfx.config.max_backends_per_se = 2; 1798 adev->gfx.config.max_texture_channel_caches = 2; 1799 adev->gfx.config.max_gprs = 256; 1800 adev->gfx.config.max_gs_threads = 32; 1801 adev->gfx.config.max_hw_contexts = 8; 1802 1803 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1804 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1805 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1806 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1807 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN; 1808 break; 1809 case CHIP_FIJI: 1810 adev->gfx.config.max_shader_engines = 4; 1811 adev->gfx.config.max_tile_pipes = 16; 1812 adev->gfx.config.max_cu_per_sh = 16; 1813 adev->gfx.config.max_sh_per_se = 1; 1814 adev->gfx.config.max_backends_per_se = 4; 1815 adev->gfx.config.max_texture_channel_caches = 16; 1816 adev->gfx.config.max_gprs = 256; 1817 adev->gfx.config.max_gs_threads = 32; 1818 adev->gfx.config.max_hw_contexts = 8; 1819 1820 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1821 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1822 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1823 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1824 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1825 break; 1826 case CHIP_POLARIS11: 1827 case CHIP_POLARIS12: 1828 ret = amdgpu_atombios_get_gfx_info(adev); 1829 if (ret) 1830 return ret; 1831 adev->gfx.config.max_gprs = 256; 1832 adev->gfx.config.max_gs_threads = 32; 1833 adev->gfx.config.max_hw_contexts = 8; 1834 1835 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1836 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1837 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1838 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1839 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN; 1840 break; 1841 case CHIP_POLARIS10: 1842 case CHIP_VEGAM: 1843 ret = amdgpu_atombios_get_gfx_info(adev); 1844 if (ret) 1845 return ret; 1846 adev->gfx.config.max_gprs = 256; 1847 adev->gfx.config.max_gs_threads = 32; 1848 adev->gfx.config.max_hw_contexts = 8; 1849 1850 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1851 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1852 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1853 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1854 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1855 break; 1856 case CHIP_TONGA: 1857 adev->gfx.config.max_shader_engines = 4; 1858 adev->gfx.config.max_tile_pipes = 8; 1859 adev->gfx.config.max_cu_per_sh = 8; 1860 adev->gfx.config.max_sh_per_se = 1; 1861 adev->gfx.config.max_backends_per_se = 2; 1862 adev->gfx.config.max_texture_channel_caches = 8; 1863 adev->gfx.config.max_gprs = 256; 1864 adev->gfx.config.max_gs_threads = 32; 1865 adev->gfx.config.max_hw_contexts = 8; 1866 1867 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1868 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1869 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1870 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1871 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1872 break; 1873 case CHIP_CARRIZO: 1874 adev->gfx.config.max_shader_engines = 1; 1875 adev->gfx.config.max_tile_pipes = 2; 1876 adev->gfx.config.max_sh_per_se = 1; 1877 adev->gfx.config.max_backends_per_se = 2; 1878 adev->gfx.config.max_cu_per_sh = 8; 1879 adev->gfx.config.max_texture_channel_caches = 2; 1880 adev->gfx.config.max_gprs = 256; 1881 adev->gfx.config.max_gs_threads = 32; 1882 adev->gfx.config.max_hw_contexts = 8; 1883 1884 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1885 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1886 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1887 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1888 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1889 break; 1890 case CHIP_STONEY: 1891 adev->gfx.config.max_shader_engines = 1; 1892 adev->gfx.config.max_tile_pipes = 2; 1893 adev->gfx.config.max_sh_per_se = 1; 1894 adev->gfx.config.max_backends_per_se = 1; 1895 adev->gfx.config.max_cu_per_sh = 3; 1896 adev->gfx.config.max_texture_channel_caches = 2; 1897 adev->gfx.config.max_gprs = 256; 1898 adev->gfx.config.max_gs_threads = 16; 1899 adev->gfx.config.max_hw_contexts = 8; 1900 1901 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1902 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1903 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1904 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1905 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN; 1906 break; 1907 default: 1908 adev->gfx.config.max_shader_engines = 2; 1909 adev->gfx.config.max_tile_pipes = 4; 1910 adev->gfx.config.max_cu_per_sh = 2; 1911 adev->gfx.config.max_sh_per_se = 1; 1912 adev->gfx.config.max_backends_per_se = 2; 1913 adev->gfx.config.max_texture_channel_caches = 4; 1914 adev->gfx.config.max_gprs = 256; 1915 adev->gfx.config.max_gs_threads = 32; 1916 adev->gfx.config.max_hw_contexts = 8; 1917 1918 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; 1919 adev->gfx.config.sc_prim_fifo_size_backend = 0x100; 1920 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; 1921 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; 1922 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN; 1923 break; 1924 } 1925 1926 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); 1927 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); 1928 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; 1929 1930 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; 1931 adev->gfx.config.mem_max_burst_length_bytes = 256; 1932 if (adev->flags & AMD_IS_APU) { 1933 /* Get memory bank mapping mode. */ 1934 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); 1935 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1936 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1937 1938 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); 1939 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); 1940 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); 1941 1942 /* Validate settings in case only one DIMM installed. */ 1943 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) 1944 dimm00_addr_map = 0; 1945 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) 1946 dimm01_addr_map = 0; 1947 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) 1948 dimm10_addr_map = 0; 1949 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) 1950 dimm11_addr_map = 0; 1951 1952 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ 1953 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ 1954 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) 1955 adev->gfx.config.mem_row_size_in_kb = 2; 1956 else 1957 adev->gfx.config.mem_row_size_in_kb = 1; 1958 } else { 1959 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS); 1960 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; 1961 if (adev->gfx.config.mem_row_size_in_kb > 4) 1962 adev->gfx.config.mem_row_size_in_kb = 4; 1963 } 1964 1965 adev->gfx.config.shader_engine_tile_size = 32; 1966 adev->gfx.config.num_gpus = 1; 1967 adev->gfx.config.multi_gpu_tile_size = 64; 1968 1969 /* fix up row size */ 1970 switch (adev->gfx.config.mem_row_size_in_kb) { 1971 case 1: 1972 default: 1973 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0); 1974 break; 1975 case 2: 1976 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1); 1977 break; 1978 case 4: 1979 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2); 1980 break; 1981 } 1982 adev->gfx.config.gb_addr_config = gb_addr_config; 1983 1984 return 0; 1985 } 1986 1987 static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, 1988 int mec, int pipe, int queue) 1989 { 1990 int r; 1991 unsigned irq_type; 1992 struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; 1993 1994 ring = &adev->gfx.compute_ring[ring_id]; 1995 1996 /* mec0 is me1 */ 1997 ring->me = mec + 1; 1998 ring->pipe = pipe; 1999 ring->queue = queue; 2000 2001 ring->ring_obj = NULL; 2002 ring->use_doorbell = true; 2003 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; 2004 ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr 2005 + (ring_id * GFX8_MEC_HPD_SIZE); 2006 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); 2007 2008 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP 2009 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) 2010 + ring->pipe; 2011 2012 /* type-2 packets are deprecated on MEC, use type-3 instead */ 2013 r = amdgpu_ring_init(adev, ring, 1024, 2014 &adev->gfx.eop_irq, irq_type); 2015 if (r) 2016 return r; 2017 2018 2019 return 0; 2020 } 2021 2022 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); 2023 2024 static int gfx_v8_0_sw_init(void *handle) 2025 { 2026 int i, j, k, r, ring_id; 2027 struct amdgpu_ring *ring; 2028 struct amdgpu_kiq *kiq; 2029 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2030 2031 switch (adev->asic_type) { 2032 case CHIP_TONGA: 2033 case CHIP_CARRIZO: 2034 case CHIP_FIJI: 2035 case CHIP_POLARIS10: 2036 case CHIP_POLARIS11: 2037 case CHIP_POLARIS12: 2038 case CHIP_VEGAM: 2039 adev->gfx.mec.num_mec = 2; 2040 break; 2041 case CHIP_TOPAZ: 2042 case CHIP_STONEY: 2043 default: 2044 adev->gfx.mec.num_mec = 1; 2045 break; 2046 } 2047 2048 adev->gfx.mec.num_pipe_per_mec = 4; 2049 adev->gfx.mec.num_queue_per_pipe = 8; 2050 2051 /* KIQ event */ 2052 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); 2053 if (r) 2054 return r; 2055 2056 /* EOP Event */ 2057 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); 2058 if (r) 2059 return r; 2060 2061 /* Privileged reg */ 2062 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, 2063 &adev->gfx.priv_reg_irq); 2064 if (r) 2065 return r; 2066 2067 /* Privileged inst */ 2068 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, 2069 &adev->gfx.priv_inst_irq); 2070 if (r) 2071 return r; 2072 2073 /* Add CP EDC/ECC irq */ 2074 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, 2075 &adev->gfx.cp_ecc_error_irq); 2076 if (r) 2077 return r; 2078 2079 /* SQ interrupts. */ 2080 r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, 2081 &adev->gfx.sq_irq); 2082 if (r) { 2083 DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); 2084 return r; 2085 } 2086 2087 INIT_WORK(&adev->gfx.sq_work.work, gfx_v8_0_sq_irq_work_func); 2088 2089 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; 2090 2091 gfx_v8_0_scratch_init(adev); 2092 2093 r = gfx_v8_0_init_microcode(adev); 2094 if (r) { 2095 DRM_ERROR("Failed to load gfx firmware!\n"); 2096 return r; 2097 } 2098 2099 r = gfx_v8_0_rlc_init(adev); 2100 if (r) { 2101 DRM_ERROR("Failed to init rlc BOs!\n"); 2102 return r; 2103 } 2104 2105 r = gfx_v8_0_mec_init(adev); 2106 if (r) { 2107 DRM_ERROR("Failed to init MEC BOs!\n"); 2108 return r; 2109 } 2110 2111 /* set up the gfx ring */ 2112 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2113 ring = &adev->gfx.gfx_ring[i]; 2114 ring->ring_obj = NULL; 2115 sprintf(ring->name, "gfx"); 2116 /* no gfx doorbells on iceland */ 2117 if (adev->asic_type != CHIP_TOPAZ) { 2118 ring->use_doorbell = true; 2119 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; 2120 } 2121 2122 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, 2123 AMDGPU_CP_IRQ_GFX_EOP); 2124 if (r) 2125 return r; 2126 } 2127 2128 2129 /* set up the compute queues - allocate horizontally across pipes */ 2130 ring_id = 0; 2131 for (i = 0; i < adev->gfx.mec.num_mec; ++i) { 2132 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { 2133 for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { 2134 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) 2135 continue; 2136 2137 r = gfx_v8_0_compute_ring_init(adev, 2138 ring_id, 2139 i, k, j); 2140 if (r) 2141 return r; 2142 2143 ring_id++; 2144 } 2145 } 2146 } 2147 2148 r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); 2149 if (r) { 2150 DRM_ERROR("Failed to init KIQ BOs!\n"); 2151 return r; 2152 } 2153 2154 kiq = &adev->gfx.kiq; 2155 r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 2156 if (r) 2157 return r; 2158 2159 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2160 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation)); 2161 if (r) 2162 return r; 2163 2164 /* reserve GDS, GWS and OA resource for gfx */ 2165 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 2166 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, 2167 &adev->gds.gds_gfx_bo, NULL, NULL); 2168 if (r) 2169 return r; 2170 2171 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, 2172 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, 2173 &adev->gds.gws_gfx_bo, NULL, NULL); 2174 if (r) 2175 return r; 2176 2177 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, 2178 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, 2179 &adev->gds.oa_gfx_bo, NULL, NULL); 2180 if (r) 2181 return r; 2182 2183 adev->gfx.ce_ram_size = 0x8000; 2184 2185 r = gfx_v8_0_gpu_early_init(adev); 2186 if (r) 2187 return r; 2188 2189 return 0; 2190 } 2191 2192 static int gfx_v8_0_sw_fini(void *handle) 2193 { 2194 int i; 2195 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2196 2197 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); 2198 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); 2199 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); 2200 2201 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 2202 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2203 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2204 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2205 2206 amdgpu_gfx_compute_mqd_sw_fini(adev); 2207 amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 2208 amdgpu_gfx_kiq_fini(adev); 2209 2210 gfx_v8_0_mec_fini(adev); 2211 gfx_v8_0_rlc_fini(adev); 2212 amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, 2213 &adev->gfx.rlc.clear_state_gpu_addr, 2214 (void **)&adev->gfx.rlc.cs_ptr); 2215 if ((adev->asic_type == CHIP_CARRIZO) || 2216 (adev->asic_type == CHIP_STONEY)) { 2217 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, 2218 &adev->gfx.rlc.cp_table_gpu_addr, 2219 (void **)&adev->gfx.rlc.cp_table_ptr); 2220 } 2221 gfx_v8_0_free_microcode(adev); 2222 2223 return 0; 2224 } 2225 2226 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) 2227 { 2228 uint32_t *modearray, *mod2array; 2229 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); 2230 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); 2231 u32 reg_offset; 2232 2233 modearray = adev->gfx.config.tile_mode_array; 2234 mod2array = adev->gfx.config.macrotile_mode_array; 2235 2236 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2237 modearray[reg_offset] = 0; 2238 2239 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2240 mod2array[reg_offset] = 0; 2241 2242 switch (adev->asic_type) { 2243 case CHIP_TOPAZ: 2244 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2245 PIPE_CONFIG(ADDR_SURF_P2) | 2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2248 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2249 PIPE_CONFIG(ADDR_SURF_P2) | 2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2252 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2253 PIPE_CONFIG(ADDR_SURF_P2) | 2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2256 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2257 PIPE_CONFIG(ADDR_SURF_P2) | 2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2260 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2261 PIPE_CONFIG(ADDR_SURF_P2) | 2262 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2263 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2264 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2265 PIPE_CONFIG(ADDR_SURF_P2) | 2266 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2267 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2268 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2269 PIPE_CONFIG(ADDR_SURF_P2) | 2270 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2271 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2272 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2273 PIPE_CONFIG(ADDR_SURF_P2)); 2274 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2275 PIPE_CONFIG(ADDR_SURF_P2) | 2276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2278 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2279 PIPE_CONFIG(ADDR_SURF_P2) | 2280 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2282 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2283 PIPE_CONFIG(ADDR_SURF_P2) | 2284 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2286 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2287 PIPE_CONFIG(ADDR_SURF_P2) | 2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2290 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2291 PIPE_CONFIG(ADDR_SURF_P2) | 2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2294 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2295 PIPE_CONFIG(ADDR_SURF_P2) | 2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2298 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2299 PIPE_CONFIG(ADDR_SURF_P2) | 2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2302 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2303 PIPE_CONFIG(ADDR_SURF_P2) | 2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2306 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2307 PIPE_CONFIG(ADDR_SURF_P2) | 2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2310 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2311 PIPE_CONFIG(ADDR_SURF_P2) | 2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2314 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2315 PIPE_CONFIG(ADDR_SURF_P2) | 2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2318 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2319 PIPE_CONFIG(ADDR_SURF_P2) | 2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2322 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2323 PIPE_CONFIG(ADDR_SURF_P2) | 2324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2326 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2327 PIPE_CONFIG(ADDR_SURF_P2) | 2328 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2330 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2331 PIPE_CONFIG(ADDR_SURF_P2) | 2332 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2334 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2335 PIPE_CONFIG(ADDR_SURF_P2) | 2336 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2338 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2339 PIPE_CONFIG(ADDR_SURF_P2) | 2340 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2342 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2343 PIPE_CONFIG(ADDR_SURF_P2) | 2344 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2346 2347 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2350 NUM_BANKS(ADDR_SURF_8_BANK)); 2351 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2354 NUM_BANKS(ADDR_SURF_8_BANK)); 2355 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2358 NUM_BANKS(ADDR_SURF_8_BANK)); 2359 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2362 NUM_BANKS(ADDR_SURF_8_BANK)); 2363 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2366 NUM_BANKS(ADDR_SURF_8_BANK)); 2367 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2370 NUM_BANKS(ADDR_SURF_8_BANK)); 2371 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2374 NUM_BANKS(ADDR_SURF_8_BANK)); 2375 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2378 NUM_BANKS(ADDR_SURF_16_BANK)); 2379 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2382 NUM_BANKS(ADDR_SURF_16_BANK)); 2383 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2386 NUM_BANKS(ADDR_SURF_16_BANK)); 2387 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2390 NUM_BANKS(ADDR_SURF_16_BANK)); 2391 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2394 NUM_BANKS(ADDR_SURF_16_BANK)); 2395 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2398 NUM_BANKS(ADDR_SURF_16_BANK)); 2399 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2402 NUM_BANKS(ADDR_SURF_8_BANK)); 2403 2404 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2405 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 2406 reg_offset != 23) 2407 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2408 2409 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2410 if (reg_offset != 7) 2411 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2412 2413 break; 2414 case CHIP_FIJI: 2415 case CHIP_VEGAM: 2416 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2420 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2424 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2428 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2432 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2434 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2435 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2436 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2437 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2438 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2440 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2441 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2442 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2444 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2445 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2446 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2448 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2449 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); 2450 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2451 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2452 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2454 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2455 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2456 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2458 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2459 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2460 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2462 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2463 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2464 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2466 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2467 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2470 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2471 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2474 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2475 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2478 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2479 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2482 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2483 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2486 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2487 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2490 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2491 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2494 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2495 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2496 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2498 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2499 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2500 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2502 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2503 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2504 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2506 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2507 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2508 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2509 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2510 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2511 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2512 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2513 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2514 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2515 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2516 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2517 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2518 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2519 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2520 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2521 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2522 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2523 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2524 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2525 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2526 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2527 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2528 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2529 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2530 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2531 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | 2532 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2533 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2534 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2535 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2536 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2537 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2538 2539 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2542 NUM_BANKS(ADDR_SURF_8_BANK)); 2543 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2546 NUM_BANKS(ADDR_SURF_8_BANK)); 2547 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2550 NUM_BANKS(ADDR_SURF_8_BANK)); 2551 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2554 NUM_BANKS(ADDR_SURF_8_BANK)); 2555 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2558 NUM_BANKS(ADDR_SURF_8_BANK)); 2559 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2562 NUM_BANKS(ADDR_SURF_8_BANK)); 2563 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2564 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2565 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2566 NUM_BANKS(ADDR_SURF_8_BANK)); 2567 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2568 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2569 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2570 NUM_BANKS(ADDR_SURF_8_BANK)); 2571 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2572 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2573 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2574 NUM_BANKS(ADDR_SURF_8_BANK)); 2575 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2576 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2577 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2578 NUM_BANKS(ADDR_SURF_8_BANK)); 2579 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2580 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2581 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2582 NUM_BANKS(ADDR_SURF_8_BANK)); 2583 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2584 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2585 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2586 NUM_BANKS(ADDR_SURF_8_BANK)); 2587 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2588 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2589 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2590 NUM_BANKS(ADDR_SURF_8_BANK)); 2591 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2592 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2593 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2594 NUM_BANKS(ADDR_SURF_4_BANK)); 2595 2596 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2597 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2598 2599 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2600 if (reg_offset != 7) 2601 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2602 2603 break; 2604 case CHIP_TONGA: 2605 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2607 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2608 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2609 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2611 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2612 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2613 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2615 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2616 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2617 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2619 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2620 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2621 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2623 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2624 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2625 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2626 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2627 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2629 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2630 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2631 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2633 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2634 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2635 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2637 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2638 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 2639 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2640 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2641 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2642 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2643 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2644 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2645 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2646 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2647 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2648 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2649 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2650 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2651 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2652 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2653 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2654 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2655 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2656 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2657 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2658 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2659 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2660 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2661 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2662 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2663 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2664 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2665 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2666 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2667 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2668 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2669 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2670 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2671 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2672 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2673 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2674 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2675 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2676 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2677 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2678 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2679 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2680 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2681 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2682 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2683 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2684 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2685 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2686 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2687 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2688 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2689 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2690 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2691 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2692 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2693 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2694 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2695 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2696 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2697 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2698 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2699 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2700 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2701 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2702 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2703 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2704 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2705 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2706 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2707 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2708 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2709 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2710 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2711 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2712 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2713 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2714 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2715 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2716 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2717 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2718 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2719 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2720 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2721 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2722 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2723 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2724 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2725 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2726 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2727 2728 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2729 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2730 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2731 NUM_BANKS(ADDR_SURF_16_BANK)); 2732 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2733 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2734 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2735 NUM_BANKS(ADDR_SURF_16_BANK)); 2736 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2737 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2738 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2739 NUM_BANKS(ADDR_SURF_16_BANK)); 2740 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2741 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2742 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2743 NUM_BANKS(ADDR_SURF_16_BANK)); 2744 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2745 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2746 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2747 NUM_BANKS(ADDR_SURF_16_BANK)); 2748 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2749 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2750 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2751 NUM_BANKS(ADDR_SURF_16_BANK)); 2752 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2753 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2754 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2755 NUM_BANKS(ADDR_SURF_16_BANK)); 2756 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2757 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2758 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2759 NUM_BANKS(ADDR_SURF_16_BANK)); 2760 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2761 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2762 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2763 NUM_BANKS(ADDR_SURF_16_BANK)); 2764 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2765 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2766 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2767 NUM_BANKS(ADDR_SURF_16_BANK)); 2768 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2769 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2770 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2771 NUM_BANKS(ADDR_SURF_16_BANK)); 2772 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2773 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2774 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2775 NUM_BANKS(ADDR_SURF_8_BANK)); 2776 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2777 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2778 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2779 NUM_BANKS(ADDR_SURF_4_BANK)); 2780 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2781 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2782 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2783 NUM_BANKS(ADDR_SURF_4_BANK)); 2784 2785 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2786 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2787 2788 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2789 if (reg_offset != 7) 2790 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2791 2792 break; 2793 case CHIP_POLARIS11: 2794 case CHIP_POLARIS12: 2795 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2797 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 2798 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2799 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2801 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 2802 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2803 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2805 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 2806 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2807 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2809 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 2810 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2811 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2813 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2814 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2815 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2817 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2819 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2821 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2823 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2824 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2825 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 2827 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 2828 PIPE_CONFIG(ADDR_SURF_P4_16x16)); 2829 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2830 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2831 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2832 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2833 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2834 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2835 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2836 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2837 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2838 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2839 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2840 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2841 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2842 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2843 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 2844 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2845 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2846 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2847 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2848 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2849 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2850 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2851 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2852 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2853 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 2854 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2855 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2856 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2857 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2858 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2859 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2860 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2861 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2862 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2863 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2864 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2865 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2866 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2867 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2868 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2869 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 2870 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2871 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2872 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2873 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2874 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2875 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2876 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2877 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 2878 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2879 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2880 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2881 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2882 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2883 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2884 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2885 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 2886 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2887 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2888 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2889 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 2890 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2891 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 2892 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2893 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 2894 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2895 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2896 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2897 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 2898 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2899 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 2900 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 2901 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 2902 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2903 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2904 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2905 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2906 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2907 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2908 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 2909 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2910 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2911 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2912 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2913 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 2914 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 2915 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 2916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 2917 2918 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2919 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2920 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2921 NUM_BANKS(ADDR_SURF_16_BANK)); 2922 2923 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2924 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2925 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2926 NUM_BANKS(ADDR_SURF_16_BANK)); 2927 2928 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2929 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2930 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2931 NUM_BANKS(ADDR_SURF_16_BANK)); 2932 2933 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2934 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2935 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2936 NUM_BANKS(ADDR_SURF_16_BANK)); 2937 2938 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2939 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2940 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2941 NUM_BANKS(ADDR_SURF_16_BANK)); 2942 2943 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2946 NUM_BANKS(ADDR_SURF_16_BANK)); 2947 2948 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2949 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2950 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2951 NUM_BANKS(ADDR_SURF_16_BANK)); 2952 2953 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2954 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 2955 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2956 NUM_BANKS(ADDR_SURF_16_BANK)); 2957 2958 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 2959 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2960 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2961 NUM_BANKS(ADDR_SURF_16_BANK)); 2962 2963 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2964 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 2965 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2966 NUM_BANKS(ADDR_SURF_16_BANK)); 2967 2968 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2969 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 2970 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 2971 NUM_BANKS(ADDR_SURF_16_BANK)); 2972 2973 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2976 NUM_BANKS(ADDR_SURF_16_BANK)); 2977 2978 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 2981 NUM_BANKS(ADDR_SURF_8_BANK)); 2982 2983 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 2984 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 2985 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 2986 NUM_BANKS(ADDR_SURF_4_BANK)); 2987 2988 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 2989 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 2990 2991 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 2992 if (reg_offset != 7) 2993 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 2994 2995 break; 2996 case CHIP_POLARIS10: 2997 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 2999 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3000 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3001 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3003 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3004 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3005 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3007 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3008 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3009 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3011 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3013 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3015 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3016 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3017 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3019 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3021 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3023 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3025 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3026 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3027 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3029 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3030 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16)); 3031 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3033 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3035 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3037 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3038 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3039 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3041 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3042 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3043 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3044 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3045 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3046 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3047 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3049 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3051 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3053 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3054 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3055 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3057 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3058 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3059 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3061 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3063 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3064 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3065 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3066 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3067 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3068 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3069 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3070 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3071 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3073 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3074 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3075 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3076 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3077 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3079 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3080 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3081 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3082 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3083 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3084 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3085 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3086 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3087 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3088 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3089 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3090 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3091 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3092 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3093 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3094 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3095 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3096 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3097 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3098 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3099 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3100 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3101 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3102 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3103 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3104 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3105 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3106 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3107 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3108 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3109 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3110 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3111 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3112 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) | 3113 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3114 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3115 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3116 PIPE_CONFIG(ADDR_SURF_P4_16x16) | 3117 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3118 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3119 3120 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3121 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3122 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3123 NUM_BANKS(ADDR_SURF_16_BANK)); 3124 3125 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3128 NUM_BANKS(ADDR_SURF_16_BANK)); 3129 3130 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3131 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3132 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3133 NUM_BANKS(ADDR_SURF_16_BANK)); 3134 3135 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3138 NUM_BANKS(ADDR_SURF_16_BANK)); 3139 3140 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3141 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3142 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3143 NUM_BANKS(ADDR_SURF_16_BANK)); 3144 3145 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3146 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3147 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3148 NUM_BANKS(ADDR_SURF_16_BANK)); 3149 3150 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3151 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3152 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3153 NUM_BANKS(ADDR_SURF_16_BANK)); 3154 3155 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3158 NUM_BANKS(ADDR_SURF_16_BANK)); 3159 3160 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3161 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3162 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3163 NUM_BANKS(ADDR_SURF_16_BANK)); 3164 3165 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3166 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3167 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3168 NUM_BANKS(ADDR_SURF_16_BANK)); 3169 3170 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3173 NUM_BANKS(ADDR_SURF_16_BANK)); 3174 3175 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3176 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3177 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3178 NUM_BANKS(ADDR_SURF_8_BANK)); 3179 3180 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3181 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3182 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3183 NUM_BANKS(ADDR_SURF_4_BANK)); 3184 3185 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | 3188 NUM_BANKS(ADDR_SURF_4_BANK)); 3189 3190 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3191 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3192 3193 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3194 if (reg_offset != 7) 3195 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3196 3197 break; 3198 case CHIP_STONEY: 3199 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3200 PIPE_CONFIG(ADDR_SURF_P2) | 3201 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3202 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3203 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3204 PIPE_CONFIG(ADDR_SURF_P2) | 3205 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3206 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3207 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3208 PIPE_CONFIG(ADDR_SURF_P2) | 3209 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3210 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3211 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3212 PIPE_CONFIG(ADDR_SURF_P2) | 3213 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3214 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3215 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3216 PIPE_CONFIG(ADDR_SURF_P2) | 3217 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3219 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3220 PIPE_CONFIG(ADDR_SURF_P2) | 3221 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3223 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3224 PIPE_CONFIG(ADDR_SURF_P2) | 3225 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3227 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3228 PIPE_CONFIG(ADDR_SURF_P2)); 3229 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3230 PIPE_CONFIG(ADDR_SURF_P2) | 3231 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3233 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3234 PIPE_CONFIG(ADDR_SURF_P2) | 3235 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3237 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3238 PIPE_CONFIG(ADDR_SURF_P2) | 3239 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3241 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3242 PIPE_CONFIG(ADDR_SURF_P2) | 3243 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3245 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3246 PIPE_CONFIG(ADDR_SURF_P2) | 3247 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3249 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3250 PIPE_CONFIG(ADDR_SURF_P2) | 3251 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3253 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3254 PIPE_CONFIG(ADDR_SURF_P2) | 3255 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3256 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3257 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3258 PIPE_CONFIG(ADDR_SURF_P2) | 3259 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3260 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3261 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3262 PIPE_CONFIG(ADDR_SURF_P2) | 3263 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3265 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3266 PIPE_CONFIG(ADDR_SURF_P2) | 3267 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3268 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3269 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3270 PIPE_CONFIG(ADDR_SURF_P2) | 3271 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3272 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3273 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3274 PIPE_CONFIG(ADDR_SURF_P2) | 3275 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3276 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3277 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3278 PIPE_CONFIG(ADDR_SURF_P2) | 3279 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3280 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3281 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3282 PIPE_CONFIG(ADDR_SURF_P2) | 3283 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3284 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3285 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3286 PIPE_CONFIG(ADDR_SURF_P2) | 3287 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3288 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3289 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3290 PIPE_CONFIG(ADDR_SURF_P2) | 3291 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3292 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3293 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3294 PIPE_CONFIG(ADDR_SURF_P2) | 3295 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3296 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3297 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3298 PIPE_CONFIG(ADDR_SURF_P2) | 3299 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3300 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3301 3302 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3305 NUM_BANKS(ADDR_SURF_8_BANK)); 3306 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3307 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3308 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3309 NUM_BANKS(ADDR_SURF_8_BANK)); 3310 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3311 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3312 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3313 NUM_BANKS(ADDR_SURF_8_BANK)); 3314 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3315 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3316 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3317 NUM_BANKS(ADDR_SURF_8_BANK)); 3318 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3319 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3320 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3321 NUM_BANKS(ADDR_SURF_8_BANK)); 3322 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3323 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3324 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3325 NUM_BANKS(ADDR_SURF_8_BANK)); 3326 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3327 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3328 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3329 NUM_BANKS(ADDR_SURF_8_BANK)); 3330 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3331 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3332 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3333 NUM_BANKS(ADDR_SURF_16_BANK)); 3334 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3335 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3336 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3337 NUM_BANKS(ADDR_SURF_16_BANK)); 3338 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3339 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3340 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3341 NUM_BANKS(ADDR_SURF_16_BANK)); 3342 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3343 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3344 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3345 NUM_BANKS(ADDR_SURF_16_BANK)); 3346 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3347 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3348 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3349 NUM_BANKS(ADDR_SURF_16_BANK)); 3350 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3351 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3352 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3353 NUM_BANKS(ADDR_SURF_16_BANK)); 3354 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3355 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3356 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3357 NUM_BANKS(ADDR_SURF_8_BANK)); 3358 3359 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3360 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3361 reg_offset != 23) 3362 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3363 3364 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3365 if (reg_offset != 7) 3366 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3367 3368 break; 3369 default: 3370 dev_warn(adev->dev, 3371 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n", 3372 adev->asic_type); 3373 3374 case CHIP_CARRIZO: 3375 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3376 PIPE_CONFIG(ADDR_SURF_P2) | 3377 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 3378 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3379 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3380 PIPE_CONFIG(ADDR_SURF_P2) | 3381 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 3382 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3383 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3384 PIPE_CONFIG(ADDR_SURF_P2) | 3385 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 3386 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3387 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3388 PIPE_CONFIG(ADDR_SURF_P2) | 3389 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 3390 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3391 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3392 PIPE_CONFIG(ADDR_SURF_P2) | 3393 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3395 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3396 PIPE_CONFIG(ADDR_SURF_P2) | 3397 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3399 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3400 PIPE_CONFIG(ADDR_SURF_P2) | 3401 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | 3402 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); 3403 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 3404 PIPE_CONFIG(ADDR_SURF_P2)); 3405 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3406 PIPE_CONFIG(ADDR_SURF_P2) | 3407 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3409 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3410 PIPE_CONFIG(ADDR_SURF_P2) | 3411 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3413 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3414 PIPE_CONFIG(ADDR_SURF_P2) | 3415 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | 3416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3417 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3418 PIPE_CONFIG(ADDR_SURF_P2) | 3419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3421 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3422 PIPE_CONFIG(ADDR_SURF_P2) | 3423 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3425 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | 3426 PIPE_CONFIG(ADDR_SURF_P2) | 3427 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3429 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3430 PIPE_CONFIG(ADDR_SURF_P2) | 3431 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3433 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3434 PIPE_CONFIG(ADDR_SURF_P2) | 3435 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3437 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | 3438 PIPE_CONFIG(ADDR_SURF_P2) | 3439 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3441 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3442 PIPE_CONFIG(ADDR_SURF_P2) | 3443 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3445 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | 3446 PIPE_CONFIG(ADDR_SURF_P2) | 3447 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3449 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | 3450 PIPE_CONFIG(ADDR_SURF_P2) | 3451 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3453 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | 3454 PIPE_CONFIG(ADDR_SURF_P2) | 3455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | 3456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3457 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | 3458 PIPE_CONFIG(ADDR_SURF_P2) | 3459 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3461 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | 3462 PIPE_CONFIG(ADDR_SURF_P2) | 3463 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | 3464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); 3465 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 3466 PIPE_CONFIG(ADDR_SURF_P2) | 3467 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3469 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 3470 PIPE_CONFIG(ADDR_SURF_P2) | 3471 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); 3473 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | 3474 PIPE_CONFIG(ADDR_SURF_P2) | 3475 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | 3476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); 3477 3478 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3481 NUM_BANKS(ADDR_SURF_8_BANK)); 3482 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3485 NUM_BANKS(ADDR_SURF_8_BANK)); 3486 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3489 NUM_BANKS(ADDR_SURF_8_BANK)); 3490 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3493 NUM_BANKS(ADDR_SURF_8_BANK)); 3494 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3497 NUM_BANKS(ADDR_SURF_8_BANK)); 3498 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3501 NUM_BANKS(ADDR_SURF_8_BANK)); 3502 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3503 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3504 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3505 NUM_BANKS(ADDR_SURF_8_BANK)); 3506 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | 3508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3509 NUM_BANKS(ADDR_SURF_16_BANK)); 3510 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | 3511 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3512 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3513 NUM_BANKS(ADDR_SURF_16_BANK)); 3514 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3515 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 3516 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3517 NUM_BANKS(ADDR_SURF_16_BANK)); 3518 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 3519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3521 NUM_BANKS(ADDR_SURF_16_BANK)); 3522 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3523 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 3524 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3525 NUM_BANKS(ADDR_SURF_16_BANK)); 3526 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | 3529 NUM_BANKS(ADDR_SURF_16_BANK)); 3530 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 3531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 3532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | 3533 NUM_BANKS(ADDR_SURF_8_BANK)); 3534 3535 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 3536 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 && 3537 reg_offset != 23) 3538 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]); 3539 3540 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) 3541 if (reg_offset != 7) 3542 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]); 3543 3544 break; 3545 } 3546 } 3547 3548 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, 3549 u32 se_num, u32 sh_num, u32 instance) 3550 { 3551 u32 data; 3552 3553 if (instance == 0xffffffff) 3554 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); 3555 else 3556 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); 3557 3558 if (se_num == 0xffffffff) 3559 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1); 3560 else 3561 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num); 3562 3563 if (sh_num == 0xffffffff) 3564 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1); 3565 else 3566 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num); 3567 3568 WREG32(mmGRBM_GFX_INDEX, data); 3569 } 3570 3571 static void gfx_v8_0_select_me_pipe_q(struct amdgpu_device *adev, 3572 u32 me, u32 pipe, u32 q) 3573 { 3574 vi_srbm_select(adev, me, pipe, q, 0); 3575 } 3576 3577 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) 3578 { 3579 u32 data, mask; 3580 3581 data = RREG32(mmCC_RB_BACKEND_DISABLE) | 3582 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3583 3584 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); 3585 3586 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / 3587 adev->gfx.config.max_sh_per_se); 3588 3589 return (~data) & mask; 3590 } 3591 3592 static void 3593 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) 3594 { 3595 switch (adev->asic_type) { 3596 case CHIP_FIJI: 3597 case CHIP_VEGAM: 3598 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | 3599 RB_XSEL2(1) | PKR_MAP(2) | 3600 PKR_XSEL(1) | PKR_YSEL(1) | 3601 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3); 3602 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | 3603 SE_PAIR_YSEL(2); 3604 break; 3605 case CHIP_TONGA: 3606 case CHIP_POLARIS10: 3607 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3608 SE_XSEL(1) | SE_YSEL(1); 3609 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) | 3610 SE_PAIR_YSEL(2); 3611 break; 3612 case CHIP_TOPAZ: 3613 case CHIP_CARRIZO: 3614 *rconf |= RB_MAP_PKR0(2); 3615 *rconf1 |= 0x0; 3616 break; 3617 case CHIP_POLARIS11: 3618 case CHIP_POLARIS12: 3619 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | 3620 SE_XSEL(1) | SE_YSEL(1); 3621 *rconf1 |= 0x0; 3622 break; 3623 case CHIP_STONEY: 3624 *rconf |= 0x0; 3625 *rconf1 |= 0x0; 3626 break; 3627 default: 3628 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); 3629 break; 3630 } 3631 } 3632 3633 static void 3634 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev, 3635 u32 raster_config, u32 raster_config_1, 3636 unsigned rb_mask, unsigned num_rb) 3637 { 3638 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); 3639 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); 3640 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); 3641 unsigned rb_per_se = num_rb / num_se; 3642 unsigned se_mask[4]; 3643 unsigned se; 3644 3645 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; 3646 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; 3647 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; 3648 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; 3649 3650 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); 3651 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); 3652 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); 3653 3654 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || 3655 (!se_mask[2] && !se_mask[3]))) { 3656 raster_config_1 &= ~SE_PAIR_MAP_MASK; 3657 3658 if (!se_mask[0] && !se_mask[1]) { 3659 raster_config_1 |= 3660 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); 3661 } else { 3662 raster_config_1 |= 3663 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); 3664 } 3665 } 3666 3667 for (se = 0; se < num_se; se++) { 3668 unsigned raster_config_se = raster_config; 3669 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); 3670 unsigned pkr1_mask = pkr0_mask << rb_per_pkr; 3671 int idx = (se / 2) * 2; 3672 3673 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { 3674 raster_config_se &= ~SE_MAP_MASK; 3675 3676 if (!se_mask[idx]) { 3677 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); 3678 } else { 3679 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); 3680 } 3681 } 3682 3683 pkr0_mask &= rb_mask; 3684 pkr1_mask &= rb_mask; 3685 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { 3686 raster_config_se &= ~PKR_MAP_MASK; 3687 3688 if (!pkr0_mask) { 3689 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); 3690 } else { 3691 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); 3692 } 3693 } 3694 3695 if (rb_per_se >= 2) { 3696 unsigned rb0_mask = 1 << (se * rb_per_se); 3697 unsigned rb1_mask = rb0_mask << 1; 3698 3699 rb0_mask &= rb_mask; 3700 rb1_mask &= rb_mask; 3701 if (!rb0_mask || !rb1_mask) { 3702 raster_config_se &= ~RB_MAP_PKR0_MASK; 3703 3704 if (!rb0_mask) { 3705 raster_config_se |= 3706 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); 3707 } else { 3708 raster_config_se |= 3709 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); 3710 } 3711 } 3712 3713 if (rb_per_se > 2) { 3714 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); 3715 rb1_mask = rb0_mask << 1; 3716 rb0_mask &= rb_mask; 3717 rb1_mask &= rb_mask; 3718 if (!rb0_mask || !rb1_mask) { 3719 raster_config_se &= ~RB_MAP_PKR1_MASK; 3720 3721 if (!rb0_mask) { 3722 raster_config_se |= 3723 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); 3724 } else { 3725 raster_config_se |= 3726 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); 3727 } 3728 } 3729 } 3730 } 3731 3732 /* GRBM_GFX_INDEX has a different offset on VI */ 3733 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); 3734 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); 3735 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3736 } 3737 3738 /* GRBM_GFX_INDEX has a different offset on VI */ 3739 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3740 } 3741 3742 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) 3743 { 3744 int i, j; 3745 u32 data; 3746 u32 raster_config = 0, raster_config_1 = 0; 3747 u32 active_rbs = 0; 3748 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / 3749 adev->gfx.config.max_sh_per_se; 3750 unsigned num_rb_pipes; 3751 3752 mutex_lock(&adev->grbm_idx_mutex); 3753 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3754 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3755 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3756 data = gfx_v8_0_get_rb_active_bitmap(adev); 3757 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * 3758 rb_bitmap_width_per_sh); 3759 } 3760 } 3761 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3762 3763 adev->gfx.config.backend_enable_mask = active_rbs; 3764 adev->gfx.config.num_rbs = hweight32(active_rbs); 3765 3766 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * 3767 adev->gfx.config.max_shader_engines, 16); 3768 3769 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1); 3770 3771 if (!adev->gfx.config.backend_enable_mask || 3772 adev->gfx.config.num_rbs >= num_rb_pipes) { 3773 WREG32(mmPA_SC_RASTER_CONFIG, raster_config); 3774 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); 3775 } else { 3776 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, 3777 adev->gfx.config.backend_enable_mask, 3778 num_rb_pipes); 3779 } 3780 3781 /* cache the values for userspace */ 3782 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3783 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3784 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3785 adev->gfx.config.rb_config[i][j].rb_backend_disable = 3786 RREG32(mmCC_RB_BACKEND_DISABLE); 3787 adev->gfx.config.rb_config[i][j].user_rb_backend_disable = 3788 RREG32(mmGC_USER_RB_BACKEND_DISABLE); 3789 adev->gfx.config.rb_config[i][j].raster_config = 3790 RREG32(mmPA_SC_RASTER_CONFIG); 3791 adev->gfx.config.rb_config[i][j].raster_config_1 = 3792 RREG32(mmPA_SC_RASTER_CONFIG_1); 3793 } 3794 } 3795 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3796 mutex_unlock(&adev->grbm_idx_mutex); 3797 } 3798 3799 /** 3800 * gfx_v8_0_init_compute_vmid - gart enable 3801 * 3802 * @adev: amdgpu_device pointer 3803 * 3804 * Initialize compute vmid sh_mem registers 3805 * 3806 */ 3807 #define DEFAULT_SH_MEM_BASES (0x6000) 3808 #define FIRST_COMPUTE_VMID (8) 3809 #define LAST_COMPUTE_VMID (16) 3810 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) 3811 { 3812 int i; 3813 uint32_t sh_mem_config; 3814 uint32_t sh_mem_bases; 3815 3816 /* 3817 * Configure apertures: 3818 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) 3819 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) 3820 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) 3821 */ 3822 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16); 3823 3824 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 << 3825 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT | 3826 SH_MEM_ALIGNMENT_MODE_UNALIGNED << 3827 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | 3828 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | 3829 SH_MEM_CONFIG__PRIVATE_ATC_MASK; 3830 3831 mutex_lock(&adev->srbm_mutex); 3832 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { 3833 vi_srbm_select(adev, 0, 0, 0, i); 3834 /* CP and shaders */ 3835 WREG32(mmSH_MEM_CONFIG, sh_mem_config); 3836 WREG32(mmSH_MEM_APE1_BASE, 1); 3837 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3838 WREG32(mmSH_MEM_BASES, sh_mem_bases); 3839 } 3840 vi_srbm_select(adev, 0, 0, 0, 0); 3841 mutex_unlock(&adev->srbm_mutex); 3842 } 3843 3844 static void gfx_v8_0_config_init(struct amdgpu_device *adev) 3845 { 3846 switch (adev->asic_type) { 3847 default: 3848 adev->gfx.config.double_offchip_lds_buf = 1; 3849 break; 3850 case CHIP_CARRIZO: 3851 case CHIP_STONEY: 3852 adev->gfx.config.double_offchip_lds_buf = 0; 3853 break; 3854 } 3855 } 3856 3857 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) 3858 { 3859 u32 tmp, sh_static_mem_cfg; 3860 int i; 3861 3862 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF); 3863 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3864 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); 3865 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); 3866 3867 gfx_v8_0_tiling_mode_table_init(adev); 3868 gfx_v8_0_setup_rb(adev); 3869 gfx_v8_0_get_cu_info(adev); 3870 gfx_v8_0_config_init(adev); 3871 3872 /* XXX SH_MEM regs */ 3873 /* where to put LDS, scratch, GPUVM in FSA64 space */ 3874 sh_static_mem_cfg = REG_SET_FIELD(0, SH_STATIC_MEM_CONFIG, 3875 SWIZZLE_ENABLE, 1); 3876 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3877 ELEMENT_SIZE, 1); 3878 sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG, 3879 INDEX_STRIDE, 3); 3880 WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg); 3881 3882 mutex_lock(&adev->srbm_mutex); 3883 for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) { 3884 vi_srbm_select(adev, 0, 0, 0, i); 3885 /* CP and shaders */ 3886 if (i == 0) { 3887 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC); 3888 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3889 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3890 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3891 WREG32(mmSH_MEM_CONFIG, tmp); 3892 WREG32(mmSH_MEM_BASES, 0); 3893 } else { 3894 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC); 3895 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC); 3896 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE, 3897 SH_MEM_ALIGNMENT_MODE_UNALIGNED); 3898 WREG32(mmSH_MEM_CONFIG, tmp); 3899 tmp = adev->gmc.shared_aperture_start >> 48; 3900 WREG32(mmSH_MEM_BASES, tmp); 3901 } 3902 3903 WREG32(mmSH_MEM_APE1_BASE, 1); 3904 WREG32(mmSH_MEM_APE1_LIMIT, 0); 3905 } 3906 vi_srbm_select(adev, 0, 0, 0, 0); 3907 mutex_unlock(&adev->srbm_mutex); 3908 3909 gfx_v8_0_init_compute_vmid(adev); 3910 3911 mutex_lock(&adev->grbm_idx_mutex); 3912 /* 3913 * making sure that the following register writes will be broadcasted 3914 * to all the shaders 3915 */ 3916 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3917 3918 WREG32(mmPA_SC_FIFO_SIZE, 3919 (adev->gfx.config.sc_prim_fifo_size_frontend << 3920 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) | 3921 (adev->gfx.config.sc_prim_fifo_size_backend << 3922 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) | 3923 (adev->gfx.config.sc_hiz_tile_fifo_size << 3924 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | 3925 (adev->gfx.config.sc_earlyz_tile_fifo_size << 3926 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); 3927 3928 tmp = RREG32(mmSPI_ARB_PRIORITY); 3929 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); 3930 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); 3931 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); 3932 tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); 3933 WREG32(mmSPI_ARB_PRIORITY, tmp); 3934 3935 mutex_unlock(&adev->grbm_idx_mutex); 3936 3937 } 3938 3939 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev) 3940 { 3941 u32 i, j, k; 3942 u32 mask; 3943 3944 mutex_lock(&adev->grbm_idx_mutex); 3945 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 3946 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 3947 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 3948 for (k = 0; k < adev->usec_timeout; k++) { 3949 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) 3950 break; 3951 udelay(1); 3952 } 3953 if (k == adev->usec_timeout) { 3954 gfx_v8_0_select_se_sh(adev, 0xffffffff, 3955 0xffffffff, 0xffffffff); 3956 mutex_unlock(&adev->grbm_idx_mutex); 3957 DRM_INFO("Timeout wait for RLC serdes %u,%u\n", 3958 i, j); 3959 return; 3960 } 3961 } 3962 } 3963 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 3964 mutex_unlock(&adev->grbm_idx_mutex); 3965 3966 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | 3967 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK | 3968 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK | 3969 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK; 3970 for (k = 0; k < adev->usec_timeout; k++) { 3971 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0) 3972 break; 3973 udelay(1); 3974 } 3975 } 3976 3977 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, 3978 bool enable) 3979 { 3980 u32 tmp = RREG32(mmCP_INT_CNTL_RING0); 3981 3982 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); 3983 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); 3984 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); 3985 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0); 3986 3987 WREG32(mmCP_INT_CNTL_RING0, tmp); 3988 } 3989 3990 static void gfx_v8_0_init_csb(struct amdgpu_device *adev) 3991 { 3992 /* csib */ 3993 WREG32(mmRLC_CSIB_ADDR_HI, 3994 adev->gfx.rlc.clear_state_gpu_addr >> 32); 3995 WREG32(mmRLC_CSIB_ADDR_LO, 3996 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); 3997 WREG32(mmRLC_CSIB_LENGTH, 3998 adev->gfx.rlc.clear_state_size); 3999 } 4000 4001 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format, 4002 int ind_offset, 4003 int list_size, 4004 int *unique_indices, 4005 int *indices_count, 4006 int max_indices, 4007 int *ind_start_offsets, 4008 int *offset_count, 4009 int max_offset) 4010 { 4011 int indices; 4012 bool new_entry = true; 4013 4014 for (; ind_offset < list_size; ind_offset++) { 4015 4016 if (new_entry) { 4017 new_entry = false; 4018 ind_start_offsets[*offset_count] = ind_offset; 4019 *offset_count = *offset_count + 1; 4020 BUG_ON(*offset_count >= max_offset); 4021 } 4022 4023 if (register_list_format[ind_offset] == 0xFFFFFFFF) { 4024 new_entry = true; 4025 continue; 4026 } 4027 4028 ind_offset += 2; 4029 4030 /* look for the matching indice */ 4031 for (indices = 0; 4032 indices < *indices_count; 4033 indices++) { 4034 if (unique_indices[indices] == 4035 register_list_format[ind_offset]) 4036 break; 4037 } 4038 4039 if (indices >= *indices_count) { 4040 unique_indices[*indices_count] = 4041 register_list_format[ind_offset]; 4042 indices = *indices_count; 4043 *indices_count = *indices_count + 1; 4044 BUG_ON(*indices_count >= max_indices); 4045 } 4046 4047 register_list_format[ind_offset] = indices; 4048 } 4049 } 4050 4051 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) 4052 { 4053 int i, temp, data; 4054 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0}; 4055 int indices_count = 0; 4056 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; 4057 int offset_count = 0; 4058 4059 int list_size; 4060 unsigned int *register_list_format = 4061 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); 4062 if (!register_list_format) 4063 return -ENOMEM; 4064 memcpy(register_list_format, adev->gfx.rlc.register_list_format, 4065 adev->gfx.rlc.reg_list_format_size_bytes); 4066 4067 gfx_v8_0_parse_ind_reg_list(register_list_format, 4068 RLC_FormatDirectRegListLength, 4069 adev->gfx.rlc.reg_list_format_size_bytes >> 2, 4070 unique_indices, 4071 &indices_count, 4072 ARRAY_SIZE(unique_indices), 4073 indirect_start_offsets, 4074 &offset_count, 4075 ARRAY_SIZE(indirect_start_offsets)); 4076 4077 /* save and restore list */ 4078 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); 4079 4080 WREG32(mmRLC_SRM_ARAM_ADDR, 0); 4081 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) 4082 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]); 4083 4084 /* indirect list */ 4085 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start); 4086 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) 4087 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]); 4088 4089 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; 4090 list_size = list_size >> 1; 4091 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size); 4092 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size); 4093 4094 /* starting offsets starts */ 4095 WREG32(mmRLC_GPM_SCRATCH_ADDR, 4096 adev->gfx.rlc.starting_offsets_start); 4097 for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) 4098 WREG32(mmRLC_GPM_SCRATCH_DATA, 4099 indirect_start_offsets[i]); 4100 4101 /* unique indices */ 4102 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; 4103 data = mmRLC_SRM_INDEX_CNTL_DATA_0; 4104 for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { 4105 if (unique_indices[i] != 0) { 4106 WREG32(temp + i, unique_indices[i] & 0x3FFFF); 4107 WREG32(data + i, unique_indices[i] >> 20); 4108 } 4109 } 4110 kfree(register_list_format); 4111 4112 return 0; 4113 } 4114 4115 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev) 4116 { 4117 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1); 4118 } 4119 4120 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev) 4121 { 4122 uint32_t data; 4123 4124 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60); 4125 4126 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10); 4127 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10); 4128 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10); 4129 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10); 4130 WREG32(mmRLC_PG_DELAY, data); 4131 4132 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3); 4133 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0); 4134 4135 } 4136 4137 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, 4138 bool enable) 4139 { 4140 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0); 4141 } 4142 4143 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, 4144 bool enable) 4145 { 4146 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0); 4147 } 4148 4149 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable) 4150 { 4151 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1); 4152 } 4153 4154 static void gfx_v8_0_init_pg(struct amdgpu_device *adev) 4155 { 4156 if ((adev->asic_type == CHIP_CARRIZO) || 4157 (adev->asic_type == CHIP_STONEY)) { 4158 gfx_v8_0_init_csb(adev); 4159 gfx_v8_0_init_save_restore_list(adev); 4160 gfx_v8_0_enable_save_restore_machine(adev); 4161 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); 4162 gfx_v8_0_init_power_gating(adev); 4163 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); 4164 } else if ((adev->asic_type == CHIP_POLARIS11) || 4165 (adev->asic_type == CHIP_POLARIS12) || 4166 (adev->asic_type == CHIP_VEGAM)) { 4167 gfx_v8_0_init_csb(adev); 4168 gfx_v8_0_init_save_restore_list(adev); 4169 gfx_v8_0_enable_save_restore_machine(adev); 4170 gfx_v8_0_init_power_gating(adev); 4171 } 4172 4173 } 4174 4175 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) 4176 { 4177 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); 4178 4179 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 4180 gfx_v8_0_wait_for_rlc_serdes(adev); 4181 } 4182 4183 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev) 4184 { 4185 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 4186 udelay(50); 4187 4188 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); 4189 udelay(50); 4190 } 4191 4192 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) 4193 { 4194 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1); 4195 4196 /* carrizo do enable cp interrupt after cp inited */ 4197 if (!(adev->flags & AMD_IS_APU)) 4198 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 4199 4200 udelay(50); 4201 } 4202 4203 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) 4204 { 4205 const struct rlc_firmware_header_v2_0 *hdr; 4206 const __le32 *fw_data; 4207 unsigned i, fw_size; 4208 4209 if (!adev->gfx.rlc_fw) 4210 return -EINVAL; 4211 4212 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 4213 amdgpu_ucode_print_rlc_hdr(&hdr->header); 4214 4215 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + 4216 le32_to_cpu(hdr->header.ucode_array_offset_bytes)); 4217 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; 4218 4219 WREG32(mmRLC_GPM_UCODE_ADDR, 0); 4220 for (i = 0; i < fw_size; i++) 4221 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); 4222 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); 4223 4224 return 0; 4225 } 4226 4227 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) 4228 { 4229 int r; 4230 u32 tmp; 4231 4232 gfx_v8_0_rlc_stop(adev); 4233 4234 /* disable CG */ 4235 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); 4236 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 4237 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 4238 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); 4239 if (adev->asic_type == CHIP_POLARIS11 || 4240 adev->asic_type == CHIP_POLARIS10 || 4241 adev->asic_type == CHIP_POLARIS12 || 4242 adev->asic_type == CHIP_VEGAM) { 4243 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); 4244 tmp &= ~0x3; 4245 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); 4246 } 4247 4248 /* disable PG */ 4249 WREG32(mmRLC_PG_CNTL, 0); 4250 4251 gfx_v8_0_rlc_reset(adev); 4252 gfx_v8_0_init_pg(adev); 4253 4254 4255 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 4256 /* legacy rlc firmware loading */ 4257 r = gfx_v8_0_rlc_load_microcode(adev); 4258 if (r) 4259 return r; 4260 } 4261 4262 gfx_v8_0_rlc_start(adev); 4263 4264 return 0; 4265 } 4266 4267 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) 4268 { 4269 int i; 4270 u32 tmp = RREG32(mmCP_ME_CNTL); 4271 4272 if (enable) { 4273 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0); 4274 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0); 4275 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0); 4276 } else { 4277 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); 4278 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); 4279 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); 4280 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 4281 adev->gfx.gfx_ring[i].ready = false; 4282 } 4283 WREG32(mmCP_ME_CNTL, tmp); 4284 udelay(50); 4285 } 4286 4287 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) 4288 { 4289 const struct gfx_firmware_header_v1_0 *pfp_hdr; 4290 const struct gfx_firmware_header_v1_0 *ce_hdr; 4291 const struct gfx_firmware_header_v1_0 *me_hdr; 4292 const __le32 *fw_data; 4293 unsigned i, fw_size; 4294 4295 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) 4296 return -EINVAL; 4297 4298 pfp_hdr = (const struct gfx_firmware_header_v1_0 *) 4299 adev->gfx.pfp_fw->data; 4300 ce_hdr = (const struct gfx_firmware_header_v1_0 *) 4301 adev->gfx.ce_fw->data; 4302 me_hdr = (const struct gfx_firmware_header_v1_0 *) 4303 adev->gfx.me_fw->data; 4304 4305 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 4306 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 4307 amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 4308 4309 gfx_v8_0_cp_gfx_enable(adev, false); 4310 4311 /* PFP */ 4312 fw_data = (const __le32 *) 4313 (adev->gfx.pfp_fw->data + 4314 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); 4315 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; 4316 WREG32(mmCP_PFP_UCODE_ADDR, 0); 4317 for (i = 0; i < fw_size; i++) 4318 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); 4319 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); 4320 4321 /* CE */ 4322 fw_data = (const __le32 *) 4323 (adev->gfx.ce_fw->data + 4324 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); 4325 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; 4326 WREG32(mmCP_CE_UCODE_ADDR, 0); 4327 for (i = 0; i < fw_size; i++) 4328 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); 4329 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); 4330 4331 /* ME */ 4332 fw_data = (const __le32 *) 4333 (adev->gfx.me_fw->data + 4334 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); 4335 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; 4336 WREG32(mmCP_ME_RAM_WADDR, 0); 4337 for (i = 0; i < fw_size; i++) 4338 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); 4339 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); 4340 4341 return 0; 4342 } 4343 4344 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) 4345 { 4346 u32 count = 0; 4347 const struct cs_section_def *sect = NULL; 4348 const struct cs_extent_def *ext = NULL; 4349 4350 /* begin clear state */ 4351 count += 2; 4352 /* context control state */ 4353 count += 3; 4354 4355 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4356 for (ext = sect->section; ext->extent != NULL; ++ext) { 4357 if (sect->id == SECT_CONTEXT) 4358 count += 2 + ext->reg_count; 4359 else 4360 return 0; 4361 } 4362 } 4363 /* pa_sc_raster_config/pa_sc_raster_config1 */ 4364 count += 4; 4365 /* end clear state */ 4366 count += 2; 4367 /* clear state */ 4368 count += 2; 4369 4370 return count; 4371 } 4372 4373 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) 4374 { 4375 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; 4376 const struct cs_section_def *sect = NULL; 4377 const struct cs_extent_def *ext = NULL; 4378 int r, i; 4379 4380 /* init the CP */ 4381 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1); 4382 WREG32(mmCP_ENDIAN_SWAP, 0); 4383 WREG32(mmCP_DEVICE_ID, 1); 4384 4385 gfx_v8_0_cp_gfx_enable(adev, true); 4386 4387 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); 4388 if (r) { 4389 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); 4390 return r; 4391 } 4392 4393 /* clear state buffer */ 4394 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4395 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); 4396 4397 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 4398 amdgpu_ring_write(ring, 0x80000000); 4399 amdgpu_ring_write(ring, 0x80000000); 4400 4401 for (sect = vi_cs_data; sect->section != NULL; ++sect) { 4402 for (ext = sect->section; ext->extent != NULL; ++ext) { 4403 if (sect->id == SECT_CONTEXT) { 4404 amdgpu_ring_write(ring, 4405 PACKET3(PACKET3_SET_CONTEXT_REG, 4406 ext->reg_count)); 4407 amdgpu_ring_write(ring, 4408 ext->reg_index - PACKET3_SET_CONTEXT_REG_START); 4409 for (i = 0; i < ext->reg_count; i++) 4410 amdgpu_ring_write(ring, ext->extent[i]); 4411 } 4412 } 4413 } 4414 4415 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2)); 4416 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START); 4417 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config); 4418 amdgpu_ring_write(ring, adev->gfx.config.rb_config[0][0].raster_config_1); 4419 4420 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0)); 4421 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE); 4422 4423 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0)); 4424 amdgpu_ring_write(ring, 0); 4425 4426 /* init the CE partitions */ 4427 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2)); 4428 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); 4429 amdgpu_ring_write(ring, 0x8000); 4430 amdgpu_ring_write(ring, 0x8000); 4431 4432 amdgpu_ring_commit(ring); 4433 4434 return 0; 4435 } 4436 static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) 4437 { 4438 u32 tmp; 4439 /* no gfx doorbells on iceland */ 4440 if (adev->asic_type == CHIP_TOPAZ) 4441 return; 4442 4443 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); 4444 4445 if (ring->use_doorbell) { 4446 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4447 DOORBELL_OFFSET, ring->doorbell_index); 4448 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4449 DOORBELL_HIT, 0); 4450 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, 4451 DOORBELL_EN, 1); 4452 } else { 4453 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); 4454 } 4455 4456 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); 4457 4458 if (adev->flags & AMD_IS_APU) 4459 return; 4460 4461 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, 4462 DOORBELL_RANGE_LOWER, 4463 AMDGPU_DOORBELL_GFX_RING0); 4464 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); 4465 4466 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, 4467 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); 4468 } 4469 4470 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) 4471 { 4472 struct amdgpu_ring *ring; 4473 u32 tmp; 4474 u32 rb_bufsz; 4475 u64 rb_addr, rptr_addr, wptr_gpu_addr; 4476 int r; 4477 4478 /* Set the write pointer delay */ 4479 WREG32(mmCP_RB_WPTR_DELAY, 0); 4480 4481 /* set the RB to use vmid 0 */ 4482 WREG32(mmCP_RB_VMID, 0); 4483 4484 /* Set ring buffer size */ 4485 ring = &adev->gfx.gfx_ring[0]; 4486 rb_bufsz = order_base_2(ring->ring_size / 8); 4487 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz); 4488 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2); 4489 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3); 4490 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1); 4491 #ifdef __BIG_ENDIAN 4492 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1); 4493 #endif 4494 WREG32(mmCP_RB0_CNTL, tmp); 4495 4496 /* Initialize the ring buffer's read and write pointers */ 4497 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK); 4498 ring->wptr = 0; 4499 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 4500 4501 /* set the wb address wether it's enabled or not */ 4502 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4503 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr)); 4504 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF); 4505 4506 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4507 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr)); 4508 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr)); 4509 mdelay(1); 4510 WREG32(mmCP_RB0_CNTL, tmp); 4511 4512 rb_addr = ring->gpu_addr >> 8; 4513 WREG32(mmCP_RB0_BASE, rb_addr); 4514 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); 4515 4516 gfx_v8_0_set_cpg_door_bell(adev, ring); 4517 /* start the ring */ 4518 amdgpu_ring_clear_ring(ring); 4519 gfx_v8_0_cp_gfx_start(adev); 4520 ring->ready = true; 4521 r = amdgpu_ring_test_ring(ring); 4522 if (r) 4523 ring->ready = false; 4524 4525 return r; 4526 } 4527 4528 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) 4529 { 4530 int i; 4531 4532 if (enable) { 4533 WREG32(mmCP_MEC_CNTL, 0); 4534 } else { 4535 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); 4536 for (i = 0; i < adev->gfx.num_compute_rings; i++) 4537 adev->gfx.compute_ring[i].ready = false; 4538 adev->gfx.kiq.ring.ready = false; 4539 } 4540 udelay(50); 4541 } 4542 4543 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) 4544 { 4545 const struct gfx_firmware_header_v1_0 *mec_hdr; 4546 const __le32 *fw_data; 4547 unsigned i, fw_size; 4548 4549 if (!adev->gfx.mec_fw) 4550 return -EINVAL; 4551 4552 gfx_v8_0_cp_compute_enable(adev, false); 4553 4554 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 4555 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 4556 4557 fw_data = (const __le32 *) 4558 (adev->gfx.mec_fw->data + 4559 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); 4560 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; 4561 4562 /* MEC1 */ 4563 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); 4564 for (i = 0; i < fw_size; i++) 4565 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); 4566 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); 4567 4568 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ 4569 if (adev->gfx.mec2_fw) { 4570 const struct gfx_firmware_header_v1_0 *mec2_hdr; 4571 4572 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; 4573 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); 4574 4575 fw_data = (const __le32 *) 4576 (adev->gfx.mec2_fw->data + 4577 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); 4578 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; 4579 4580 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); 4581 for (i = 0; i < fw_size; i++) 4582 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); 4583 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); 4584 } 4585 4586 return 0; 4587 } 4588 4589 /* KIQ functions */ 4590 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) 4591 { 4592 uint32_t tmp; 4593 struct amdgpu_device *adev = ring->adev; 4594 4595 /* tell RLC which is KIQ queue */ 4596 tmp = RREG32(mmRLC_CP_SCHEDULERS); 4597 tmp &= 0xffffff00; 4598 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); 4599 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4600 tmp |= 0x80; 4601 WREG32(mmRLC_CP_SCHEDULERS, tmp); 4602 } 4603 4604 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) 4605 { 4606 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; 4607 uint32_t scratch, tmp = 0; 4608 uint64_t queue_mask = 0; 4609 int r, i; 4610 4611 for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { 4612 if (!test_bit(i, adev->gfx.mec.queue_bitmap)) 4613 continue; 4614 4615 /* This situation may be hit in the future if a new HW 4616 * generation exposes more than 64 queues. If so, the 4617 * definition of queue_mask needs updating */ 4618 if (WARN_ON(i >= (sizeof(queue_mask)*8))) { 4619 DRM_ERROR("Invalid KCQ enabled: %d\n", i); 4620 break; 4621 } 4622 4623 queue_mask |= (1ull << i); 4624 } 4625 4626 r = amdgpu_gfx_scratch_get(adev, &scratch); 4627 if (r) { 4628 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 4629 return r; 4630 } 4631 WREG32(scratch, 0xCAFEDEAD); 4632 4633 r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); 4634 if (r) { 4635 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 4636 amdgpu_gfx_scratch_free(adev, scratch); 4637 return r; 4638 } 4639 /* set resources */ 4640 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); 4641 amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ 4642 amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ 4643 amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ 4644 amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ 4645 amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ 4646 amdgpu_ring_write(kiq_ring, 0); /* oac mask */ 4647 amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ 4648 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4649 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 4650 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); 4651 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4652 4653 /* map queues */ 4654 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); 4655 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ 4656 amdgpu_ring_write(kiq_ring, 4657 PACKET3_MAP_QUEUES_NUM_QUEUES(1)); 4658 amdgpu_ring_write(kiq_ring, 4659 PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | 4660 PACKET3_MAP_QUEUES_QUEUE(ring->queue) | 4661 PACKET3_MAP_QUEUES_PIPE(ring->pipe) | 4662 PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ 4663 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); 4664 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); 4665 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); 4666 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); 4667 } 4668 /* write to scratch for completion */ 4669 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 4670 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 4671 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 4672 amdgpu_ring_commit(kiq_ring); 4673 4674 for (i = 0; i < adev->usec_timeout; i++) { 4675 tmp = RREG32(scratch); 4676 if (tmp == 0xDEADBEEF) 4677 break; 4678 DRM_UDELAY(1); 4679 } 4680 if (i >= adev->usec_timeout) { 4681 DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", 4682 scratch, tmp); 4683 r = -EINVAL; 4684 } 4685 amdgpu_gfx_scratch_free(adev, scratch); 4686 4687 return r; 4688 } 4689 4690 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) 4691 { 4692 int i, r = 0; 4693 4694 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { 4695 WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); 4696 for (i = 0; i < adev->usec_timeout; i++) { 4697 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) 4698 break; 4699 udelay(1); 4700 } 4701 if (i == adev->usec_timeout) 4702 r = -ETIMEDOUT; 4703 } 4704 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); 4705 WREG32(mmCP_HQD_PQ_RPTR, 0); 4706 WREG32(mmCP_HQD_PQ_WPTR, 0); 4707 4708 return r; 4709 } 4710 4711 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) 4712 { 4713 struct amdgpu_device *adev = ring->adev; 4714 struct vi_mqd *mqd = ring->mqd_ptr; 4715 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4716 uint32_t tmp; 4717 4718 mqd->header = 0xC0310800; 4719 mqd->compute_pipelinestat_enable = 0x00000001; 4720 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4721 mqd->compute_static_thread_mgmt_se1 = 0xffffffff; 4722 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4723 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4724 mqd->compute_misc_reserved = 0x00000003; 4725 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr 4726 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4727 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr 4728 + offsetof(struct vi_mqd_allocation, dynamic_cu_mask)); 4729 eop_base_addr = ring->eop_gpu_addr >> 8; 4730 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4731 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4732 4733 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ 4734 tmp = RREG32(mmCP_HQD_EOP_CONTROL); 4735 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, 4736 (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); 4737 4738 mqd->cp_hqd_eop_control = tmp; 4739 4740 /* enable doorbell? */ 4741 tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL), 4742 CP_HQD_PQ_DOORBELL_CONTROL, 4743 DOORBELL_EN, 4744 ring->use_doorbell ? 1 : 0); 4745 4746 mqd->cp_hqd_pq_doorbell_control = tmp; 4747 4748 /* set the pointer to the MQD */ 4749 mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; 4750 mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); 4751 4752 /* set MQD vmid to 0 */ 4753 tmp = RREG32(mmCP_MQD_CONTROL); 4754 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); 4755 mqd->cp_mqd_control = tmp; 4756 4757 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ 4758 hqd_gpu_addr = ring->gpu_addr >> 8; 4759 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; 4760 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); 4761 4762 /* set up the HQD, this is similar to CP_RB0_CNTL */ 4763 tmp = RREG32(mmCP_HQD_PQ_CONTROL); 4764 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, 4765 (order_base_2(ring->ring_size / 4) - 1)); 4766 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, 4767 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); 4768 #ifdef __BIG_ENDIAN 4769 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); 4770 #endif 4771 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); 4772 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); 4773 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); 4774 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); 4775 mqd->cp_hqd_pq_control = tmp; 4776 4777 /* set the wb address whether it's enabled or not */ 4778 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); 4779 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; 4780 mqd->cp_hqd_pq_rptr_report_addr_hi = 4781 upper_32_bits(wb_gpu_addr) & 0xffff; 4782 4783 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ 4784 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); 4785 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; 4786 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; 4787 4788 tmp = 0; 4789 /* enable the doorbell if requested */ 4790 if (ring->use_doorbell) { 4791 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); 4792 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4793 DOORBELL_OFFSET, ring->doorbell_index); 4794 4795 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4796 DOORBELL_EN, 1); 4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4798 DOORBELL_SOURCE, 0); 4799 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, 4800 DOORBELL_HIT, 0); 4801 } 4802 4803 mqd->cp_hqd_pq_doorbell_control = tmp; 4804 4805 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ 4806 ring->wptr = 0; 4807 mqd->cp_hqd_pq_wptr = ring->wptr; 4808 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); 4809 4810 /* set the vmid for the queue */ 4811 mqd->cp_hqd_vmid = 0; 4812 4813 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); 4814 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); 4815 mqd->cp_hqd_persistent_state = tmp; 4816 4817 /* set MTYPE */ 4818 tmp = RREG32(mmCP_HQD_IB_CONTROL); 4819 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); 4820 tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); 4821 mqd->cp_hqd_ib_control = tmp; 4822 4823 tmp = RREG32(mmCP_HQD_IQ_TIMER); 4824 tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); 4825 mqd->cp_hqd_iq_timer = tmp; 4826 4827 tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); 4828 tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); 4829 mqd->cp_hqd_ctx_save_control = tmp; 4830 4831 /* defaults */ 4832 mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); 4833 mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); 4834 mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); 4835 mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); 4836 mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); 4837 mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); 4838 mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); 4839 mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); 4840 mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); 4841 mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); 4842 mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); 4843 mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); 4844 mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); 4845 mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); 4846 mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); 4847 4848 /* activate the queue */ 4849 mqd->cp_hqd_active = 1; 4850 4851 return 0; 4852 } 4853 4854 int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, 4855 struct vi_mqd *mqd) 4856 { 4857 uint32_t mqd_reg; 4858 uint32_t *mqd_data; 4859 4860 /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ 4861 mqd_data = &mqd->cp_mqd_base_addr_lo; 4862 4863 /* disable wptr polling */ 4864 WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); 4865 4866 /* program all HQD registers */ 4867 for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) 4868 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4869 4870 /* Tonga errata: EOP RPTR/WPTR should be left unmodified. 4871 * This is safe since EOP RPTR==WPTR for any inactive HQD 4872 * on ASICs that do not support context-save. 4873 * EOP writes/reads can start anywhere in the ring. 4874 */ 4875 if (adev->asic_type != CHIP_TONGA) { 4876 WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); 4877 WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); 4878 WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); 4879 } 4880 4881 for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) 4882 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4883 4884 /* activate the HQD */ 4885 for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) 4886 WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); 4887 4888 return 0; 4889 } 4890 4891 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) 4892 { 4893 struct amdgpu_device *adev = ring->adev; 4894 struct vi_mqd *mqd = ring->mqd_ptr; 4895 int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; 4896 4897 gfx_v8_0_kiq_setting(ring); 4898 4899 if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4900 /* reset MQD to a clean status */ 4901 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4902 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4903 4904 /* reset ring buffer */ 4905 ring->wptr = 0; 4906 amdgpu_ring_clear_ring(ring); 4907 mutex_lock(&adev->srbm_mutex); 4908 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4909 gfx_v8_0_mqd_commit(adev, mqd); 4910 vi_srbm_select(adev, 0, 0, 0, 0); 4911 mutex_unlock(&adev->srbm_mutex); 4912 } else { 4913 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4914 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4915 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4916 mutex_lock(&adev->srbm_mutex); 4917 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4918 gfx_v8_0_mqd_init(ring); 4919 gfx_v8_0_mqd_commit(adev, mqd); 4920 vi_srbm_select(adev, 0, 0, 0, 0); 4921 mutex_unlock(&adev->srbm_mutex); 4922 4923 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4924 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4925 } 4926 4927 return 0; 4928 } 4929 4930 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) 4931 { 4932 struct amdgpu_device *adev = ring->adev; 4933 struct vi_mqd *mqd = ring->mqd_ptr; 4934 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4935 4936 if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { 4937 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); 4938 ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; 4939 ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; 4940 mutex_lock(&adev->srbm_mutex); 4941 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4942 gfx_v8_0_mqd_init(ring); 4943 vi_srbm_select(adev, 0, 0, 0, 0); 4944 mutex_unlock(&adev->srbm_mutex); 4945 4946 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4947 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation)); 4948 } else if (adev->in_gpu_reset) { /* for GPU_RESET case */ 4949 /* reset MQD to a clean status */ 4950 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4951 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation)); 4952 /* reset ring buffer */ 4953 ring->wptr = 0; 4954 amdgpu_ring_clear_ring(ring); 4955 } else { 4956 amdgpu_ring_clear_ring(ring); 4957 } 4958 return 0; 4959 } 4960 4961 static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) 4962 { 4963 if (adev->asic_type > CHIP_TONGA) { 4964 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); 4965 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); 4966 } 4967 /* enable doorbells */ 4968 WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); 4969 } 4970 4971 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) 4972 { 4973 struct amdgpu_ring *ring = NULL; 4974 int r = 0, i; 4975 4976 gfx_v8_0_cp_compute_enable(adev, true); 4977 4978 ring = &adev->gfx.kiq.ring; 4979 4980 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4981 if (unlikely(r != 0)) 4982 goto done; 4983 4984 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 4985 if (!r) { 4986 r = gfx_v8_0_kiq_init_queue(ring); 4987 amdgpu_bo_kunmap(ring->mqd_obj); 4988 ring->mqd_ptr = NULL; 4989 } 4990 amdgpu_bo_unreserve(ring->mqd_obj); 4991 if (r) 4992 goto done; 4993 4994 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 4995 ring = &adev->gfx.compute_ring[i]; 4996 4997 r = amdgpu_bo_reserve(ring->mqd_obj, false); 4998 if (unlikely(r != 0)) 4999 goto done; 5000 r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); 5001 if (!r) { 5002 r = gfx_v8_0_kcq_init_queue(ring); 5003 amdgpu_bo_kunmap(ring->mqd_obj); 5004 ring->mqd_ptr = NULL; 5005 } 5006 amdgpu_bo_unreserve(ring->mqd_obj); 5007 if (r) 5008 goto done; 5009 } 5010 5011 gfx_v8_0_set_mec_doorbell_range(adev); 5012 5013 r = gfx_v8_0_kiq_kcq_enable(adev); 5014 if (r) 5015 goto done; 5016 5017 /* Test KIQ */ 5018 ring = &adev->gfx.kiq.ring; 5019 ring->ready = true; 5020 r = amdgpu_ring_test_ring(ring); 5021 if (r) { 5022 ring->ready = false; 5023 goto done; 5024 } 5025 5026 /* Test KCQs */ 5027 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5028 ring = &adev->gfx.compute_ring[i]; 5029 ring->ready = true; 5030 r = amdgpu_ring_test_ring(ring); 5031 if (r) 5032 ring->ready = false; 5033 } 5034 5035 done: 5036 return r; 5037 } 5038 5039 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) 5040 { 5041 int r; 5042 5043 if (!(adev->flags & AMD_IS_APU)) 5044 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 5045 5046 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { 5047 /* legacy firmware loading */ 5048 r = gfx_v8_0_cp_gfx_load_microcode(adev); 5049 if (r) 5050 return r; 5051 5052 r = gfx_v8_0_cp_compute_load_microcode(adev); 5053 if (r) 5054 return r; 5055 } 5056 5057 r = gfx_v8_0_cp_gfx_resume(adev); 5058 if (r) 5059 return r; 5060 5061 r = gfx_v8_0_kiq_resume(adev); 5062 if (r) 5063 return r; 5064 5065 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 5066 5067 return 0; 5068 } 5069 5070 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable) 5071 { 5072 gfx_v8_0_cp_gfx_enable(adev, enable); 5073 gfx_v8_0_cp_compute_enable(adev, enable); 5074 } 5075 5076 static int gfx_v8_0_hw_init(void *handle) 5077 { 5078 int r; 5079 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5080 5081 gfx_v8_0_init_golden_registers(adev); 5082 gfx_v8_0_gpu_init(adev); 5083 5084 r = gfx_v8_0_rlc_resume(adev); 5085 if (r) 5086 return r; 5087 5088 r = gfx_v8_0_cp_resume(adev); 5089 5090 return r; 5091 } 5092 5093 static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) 5094 { 5095 struct amdgpu_device *adev = kiq_ring->adev; 5096 uint32_t scratch, tmp = 0; 5097 int r, i; 5098 5099 r = amdgpu_gfx_scratch_get(adev, &scratch); 5100 if (r) { 5101 DRM_ERROR("Failed to get scratch reg (%d).\n", r); 5102 return r; 5103 } 5104 WREG32(scratch, 0xCAFEDEAD); 5105 5106 r = amdgpu_ring_alloc(kiq_ring, 10); 5107 if (r) { 5108 DRM_ERROR("Failed to lock KIQ (%d).\n", r); 5109 amdgpu_gfx_scratch_free(adev, scratch); 5110 return r; 5111 } 5112 5113 /* unmap queues */ 5114 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); 5115 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ 5116 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ 5117 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | 5118 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | 5119 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); 5120 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); 5121 amdgpu_ring_write(kiq_ring, 0); 5122 amdgpu_ring_write(kiq_ring, 0); 5123 amdgpu_ring_write(kiq_ring, 0); 5124 /* write to scratch for completion */ 5125 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); 5126 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); 5127 amdgpu_ring_write(kiq_ring, 0xDEADBEEF); 5128 amdgpu_ring_commit(kiq_ring); 5129 5130 for (i = 0; i < adev->usec_timeout; i++) { 5131 tmp = RREG32(scratch); 5132 if (tmp == 0xDEADBEEF) 5133 break; 5134 DRM_UDELAY(1); 5135 } 5136 if (i >= adev->usec_timeout) { 5137 DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); 5138 r = -EINVAL; 5139 } 5140 amdgpu_gfx_scratch_free(adev, scratch); 5141 return r; 5142 } 5143 5144 static int gfx_v8_0_hw_fini(void *handle) 5145 { 5146 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5147 int i; 5148 5149 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5150 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5151 5152 amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); 5153 5154 amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); 5155 5156 /* disable KCQ to avoid CPC touch memory not valid anymore */ 5157 for (i = 0; i < adev->gfx.num_compute_rings; i++) 5158 gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); 5159 5160 if (amdgpu_sriov_vf(adev)) { 5161 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5162 return 0; 5163 } 5164 gfx_v8_0_cp_enable(adev, false); 5165 gfx_v8_0_rlc_stop(adev); 5166 5167 amdgpu_device_ip_set_powergating_state(adev, 5168 AMD_IP_BLOCK_TYPE_GFX, 5169 AMD_PG_STATE_UNGATE); 5170 5171 return 0; 5172 } 5173 5174 static int gfx_v8_0_suspend(void *handle) 5175 { 5176 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5177 adev->gfx.in_suspend = true; 5178 return gfx_v8_0_hw_fini(adev); 5179 } 5180 5181 static int gfx_v8_0_resume(void *handle) 5182 { 5183 int r; 5184 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5185 5186 r = gfx_v8_0_hw_init(adev); 5187 adev->gfx.in_suspend = false; 5188 return r; 5189 } 5190 5191 static bool gfx_v8_0_is_idle(void *handle) 5192 { 5193 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5194 5195 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) 5196 return false; 5197 else 5198 return true; 5199 } 5200 5201 static int gfx_v8_0_wait_for_idle(void *handle) 5202 { 5203 unsigned i; 5204 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5205 5206 for (i = 0; i < adev->usec_timeout; i++) { 5207 if (gfx_v8_0_is_idle(handle)) 5208 return 0; 5209 5210 udelay(1); 5211 } 5212 return -ETIMEDOUT; 5213 } 5214 5215 static bool gfx_v8_0_check_soft_reset(void *handle) 5216 { 5217 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5218 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5219 u32 tmp; 5220 5221 /* GRBM_STATUS */ 5222 tmp = RREG32(mmGRBM_STATUS); 5223 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | 5224 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | 5225 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | 5226 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | 5227 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | 5228 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK | 5229 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) { 5230 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5231 GRBM_SOFT_RESET, SOFT_RESET_CP, 1); 5232 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5233 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); 5234 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5235 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5236 } 5237 5238 /* GRBM_STATUS2 */ 5239 tmp = RREG32(mmGRBM_STATUS2); 5240 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY)) 5241 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, 5242 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); 5243 5244 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) || 5245 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) || 5246 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) { 5247 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5248 SOFT_RESET_CPF, 1); 5249 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5250 SOFT_RESET_CPC, 1); 5251 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, 5252 SOFT_RESET_CPG, 1); 5253 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, 5254 SOFT_RESET_GRBM, 1); 5255 } 5256 5257 /* SRBM_STATUS */ 5258 tmp = RREG32(mmSRBM_STATUS); 5259 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING)) 5260 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5261 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); 5262 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY)) 5263 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, 5264 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); 5265 5266 if (grbm_soft_reset || srbm_soft_reset) { 5267 adev->gfx.grbm_soft_reset = grbm_soft_reset; 5268 adev->gfx.srbm_soft_reset = srbm_soft_reset; 5269 return true; 5270 } else { 5271 adev->gfx.grbm_soft_reset = 0; 5272 adev->gfx.srbm_soft_reset = 0; 5273 return false; 5274 } 5275 } 5276 5277 static int gfx_v8_0_pre_soft_reset(void *handle) 5278 { 5279 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5280 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5281 5282 if ((!adev->gfx.grbm_soft_reset) && 5283 (!adev->gfx.srbm_soft_reset)) 5284 return 0; 5285 5286 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5287 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5288 5289 /* stop the rlc */ 5290 gfx_v8_0_rlc_stop(adev); 5291 5292 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5293 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5294 /* Disable GFX parsing/prefetching */ 5295 gfx_v8_0_cp_gfx_enable(adev, false); 5296 5297 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5298 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5299 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5300 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5301 int i; 5302 5303 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5304 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5305 5306 mutex_lock(&adev->srbm_mutex); 5307 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5308 gfx_v8_0_deactivate_hqd(adev, 2); 5309 vi_srbm_select(adev, 0, 0, 0, 0); 5310 mutex_unlock(&adev->srbm_mutex); 5311 } 5312 /* Disable MEC parsing/prefetching */ 5313 gfx_v8_0_cp_compute_enable(adev, false); 5314 } 5315 5316 return 0; 5317 } 5318 5319 static int gfx_v8_0_soft_reset(void *handle) 5320 { 5321 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5322 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5323 u32 tmp; 5324 5325 if ((!adev->gfx.grbm_soft_reset) && 5326 (!adev->gfx.srbm_soft_reset)) 5327 return 0; 5328 5329 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5330 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5331 5332 if (grbm_soft_reset || srbm_soft_reset) { 5333 tmp = RREG32(mmGMCON_DEBUG); 5334 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1); 5335 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1); 5336 WREG32(mmGMCON_DEBUG, tmp); 5337 udelay(50); 5338 } 5339 5340 if (grbm_soft_reset) { 5341 tmp = RREG32(mmGRBM_SOFT_RESET); 5342 tmp |= grbm_soft_reset; 5343 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); 5344 WREG32(mmGRBM_SOFT_RESET, tmp); 5345 tmp = RREG32(mmGRBM_SOFT_RESET); 5346 5347 udelay(50); 5348 5349 tmp &= ~grbm_soft_reset; 5350 WREG32(mmGRBM_SOFT_RESET, tmp); 5351 tmp = RREG32(mmGRBM_SOFT_RESET); 5352 } 5353 5354 if (srbm_soft_reset) { 5355 tmp = RREG32(mmSRBM_SOFT_RESET); 5356 tmp |= srbm_soft_reset; 5357 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); 5358 WREG32(mmSRBM_SOFT_RESET, tmp); 5359 tmp = RREG32(mmSRBM_SOFT_RESET); 5360 5361 udelay(50); 5362 5363 tmp &= ~srbm_soft_reset; 5364 WREG32(mmSRBM_SOFT_RESET, tmp); 5365 tmp = RREG32(mmSRBM_SOFT_RESET); 5366 } 5367 5368 if (grbm_soft_reset || srbm_soft_reset) { 5369 tmp = RREG32(mmGMCON_DEBUG); 5370 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0); 5371 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0); 5372 WREG32(mmGMCON_DEBUG, tmp); 5373 } 5374 5375 /* Wait a little for things to settle down */ 5376 udelay(50); 5377 5378 return 0; 5379 } 5380 5381 static int gfx_v8_0_post_soft_reset(void *handle) 5382 { 5383 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5384 u32 grbm_soft_reset = 0, srbm_soft_reset = 0; 5385 5386 if ((!adev->gfx.grbm_soft_reset) && 5387 (!adev->gfx.srbm_soft_reset)) 5388 return 0; 5389 5390 grbm_soft_reset = adev->gfx.grbm_soft_reset; 5391 srbm_soft_reset = adev->gfx.srbm_soft_reset; 5392 5393 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5394 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) 5395 gfx_v8_0_cp_gfx_resume(adev); 5396 5397 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || 5398 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || 5399 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) || 5400 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) { 5401 int i; 5402 5403 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 5404 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; 5405 5406 mutex_lock(&adev->srbm_mutex); 5407 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 5408 gfx_v8_0_deactivate_hqd(adev, 2); 5409 vi_srbm_select(adev, 0, 0, 0, 0); 5410 mutex_unlock(&adev->srbm_mutex); 5411 } 5412 gfx_v8_0_kiq_resume(adev); 5413 } 5414 gfx_v8_0_rlc_start(adev); 5415 5416 return 0; 5417 } 5418 5419 /** 5420 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot 5421 * 5422 * @adev: amdgpu_device pointer 5423 * 5424 * Fetches a GPU clock counter snapshot. 5425 * Returns the 64 bit clock counter snapshot. 5426 */ 5427 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev) 5428 { 5429 uint64_t clock; 5430 5431 mutex_lock(&adev->gfx.gpu_clock_mutex); 5432 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); 5433 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) | 5434 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); 5435 mutex_unlock(&adev->gfx.gpu_clock_mutex); 5436 return clock; 5437 } 5438 5439 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, 5440 uint32_t vmid, 5441 uint32_t gds_base, uint32_t gds_size, 5442 uint32_t gws_base, uint32_t gws_size, 5443 uint32_t oa_base, uint32_t oa_size) 5444 { 5445 gds_base = gds_base >> AMDGPU_GDS_SHIFT; 5446 gds_size = gds_size >> AMDGPU_GDS_SHIFT; 5447 5448 gws_base = gws_base >> AMDGPU_GWS_SHIFT; 5449 gws_size = gws_size >> AMDGPU_GWS_SHIFT; 5450 5451 oa_base = oa_base >> AMDGPU_OA_SHIFT; 5452 oa_size = oa_size >> AMDGPU_OA_SHIFT; 5453 5454 /* GDS Base */ 5455 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5456 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5457 WRITE_DATA_DST_SEL(0))); 5458 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base); 5459 amdgpu_ring_write(ring, 0); 5460 amdgpu_ring_write(ring, gds_base); 5461 5462 /* GDS Size */ 5463 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5464 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5465 WRITE_DATA_DST_SEL(0))); 5466 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size); 5467 amdgpu_ring_write(ring, 0); 5468 amdgpu_ring_write(ring, gds_size); 5469 5470 /* GWS */ 5471 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5472 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5473 WRITE_DATA_DST_SEL(0))); 5474 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws); 5475 amdgpu_ring_write(ring, 0); 5476 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base); 5477 5478 /* OA */ 5479 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 5480 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 5481 WRITE_DATA_DST_SEL(0))); 5482 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa); 5483 amdgpu_ring_write(ring, 0); 5484 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); 5485 } 5486 5487 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) 5488 { 5489 WREG32(mmSQ_IND_INDEX, 5490 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5491 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5492 (address << SQ_IND_INDEX__INDEX__SHIFT) | 5493 (SQ_IND_INDEX__FORCE_READ_MASK)); 5494 return RREG32(mmSQ_IND_DATA); 5495 } 5496 5497 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, 5498 uint32_t wave, uint32_t thread, 5499 uint32_t regno, uint32_t num, uint32_t *out) 5500 { 5501 WREG32(mmSQ_IND_INDEX, 5502 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) | 5503 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) | 5504 (regno << SQ_IND_INDEX__INDEX__SHIFT) | 5505 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) | 5506 (SQ_IND_INDEX__FORCE_READ_MASK) | 5507 (SQ_IND_INDEX__AUTO_INCR_MASK)); 5508 while (num--) 5509 *(out++) = RREG32(mmSQ_IND_DATA); 5510 } 5511 5512 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) 5513 { 5514 /* type 0 wave data */ 5515 dst[(*no_fields)++] = 0; 5516 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS); 5517 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO); 5518 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI); 5519 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO); 5520 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI); 5521 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID); 5522 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0); 5523 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1); 5524 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC); 5525 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC); 5526 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS); 5527 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS); 5528 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO); 5529 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI); 5530 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO); 5531 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI); 5532 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0); 5533 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0); 5534 } 5535 5536 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, 5537 uint32_t wave, uint32_t start, 5538 uint32_t size, uint32_t *dst) 5539 { 5540 wave_read_regs( 5541 adev, simd, wave, 0, 5542 start + SQIND_WAVE_SGPRS_OFFSET, size, dst); 5543 } 5544 5545 5546 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = { 5547 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter, 5548 .select_se_sh = &gfx_v8_0_select_se_sh, 5549 .read_wave_data = &gfx_v8_0_read_wave_data, 5550 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs, 5551 .select_me_pipe_q = &gfx_v8_0_select_me_pipe_q 5552 }; 5553 5554 static int gfx_v8_0_early_init(void *handle) 5555 { 5556 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5557 5558 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; 5559 adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; 5560 adev->gfx.funcs = &gfx_v8_0_gfx_funcs; 5561 gfx_v8_0_set_ring_funcs(adev); 5562 gfx_v8_0_set_irq_funcs(adev); 5563 gfx_v8_0_set_gds_init(adev); 5564 gfx_v8_0_set_rlc_funcs(adev); 5565 5566 return 0; 5567 } 5568 5569 static int gfx_v8_0_late_init(void *handle) 5570 { 5571 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5572 int r; 5573 5574 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0); 5575 if (r) 5576 return r; 5577 5578 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0); 5579 if (r) 5580 return r; 5581 5582 /* requires IBs so do in late init after IB pool is initialized */ 5583 r = gfx_v8_0_do_edc_gpr_workarounds(adev); 5584 if (r) 5585 return r; 5586 5587 r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); 5588 if (r) { 5589 DRM_ERROR("amdgpu_irq_get() failed to get IRQ for EDC, r: %d.\n", r); 5590 return r; 5591 } 5592 5593 r = amdgpu_irq_get(adev, &adev->gfx.sq_irq, 0); 5594 if (r) { 5595 DRM_ERROR( 5596 "amdgpu_irq_get() failed to get IRQ for SQ, r: %d.\n", 5597 r); 5598 return r; 5599 } 5600 5601 return 0; 5602 } 5603 5604 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, 5605 bool enable) 5606 { 5607 if (((adev->asic_type == CHIP_POLARIS11) || 5608 (adev->asic_type == CHIP_POLARIS12) || 5609 (adev->asic_type == CHIP_VEGAM)) && 5610 adev->powerplay.pp_funcs->set_powergating_by_smu) 5611 /* Send msg to SMU via Powerplay */ 5612 amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); 5613 5614 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); 5615 } 5616 5617 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, 5618 bool enable) 5619 { 5620 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0); 5621 } 5622 5623 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev, 5624 bool enable) 5625 { 5626 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0); 5627 } 5628 5629 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev, 5630 bool enable) 5631 { 5632 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0); 5633 } 5634 5635 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev, 5636 bool enable) 5637 { 5638 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0); 5639 5640 /* Read any GFX register to wake up GFX. */ 5641 if (!enable) 5642 RREG32(mmDB_RENDER_CONTROL); 5643 } 5644 5645 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, 5646 bool enable) 5647 { 5648 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { 5649 cz_enable_gfx_cg_power_gating(adev, true); 5650 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) 5651 cz_enable_gfx_pipeline_power_gating(adev, true); 5652 } else { 5653 cz_enable_gfx_cg_power_gating(adev, false); 5654 cz_enable_gfx_pipeline_power_gating(adev, false); 5655 } 5656 } 5657 5658 static int gfx_v8_0_set_powergating_state(void *handle, 5659 enum amd_powergating_state state) 5660 { 5661 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5662 bool enable = (state == AMD_PG_STATE_GATE); 5663 5664 if (amdgpu_sriov_vf(adev)) 5665 return 0; 5666 5667 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5668 AMD_PG_SUPPORT_RLC_SMU_HS | 5669 AMD_PG_SUPPORT_CP | 5670 AMD_PG_SUPPORT_GFX_DMG)) 5671 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5672 switch (adev->asic_type) { 5673 case CHIP_CARRIZO: 5674 case CHIP_STONEY: 5675 5676 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { 5677 cz_enable_sck_slow_down_on_power_up(adev, true); 5678 cz_enable_sck_slow_down_on_power_down(adev, true); 5679 } else { 5680 cz_enable_sck_slow_down_on_power_up(adev, false); 5681 cz_enable_sck_slow_down_on_power_down(adev, false); 5682 } 5683 if (adev->pg_flags & AMD_PG_SUPPORT_CP) 5684 cz_enable_cp_power_gating(adev, true); 5685 else 5686 cz_enable_cp_power_gating(adev, false); 5687 5688 cz_update_gfx_cg_power_gating(adev, enable); 5689 5690 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5691 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5692 else 5693 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5694 5695 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5696 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5697 else 5698 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5699 break; 5700 case CHIP_POLARIS11: 5701 case CHIP_POLARIS12: 5702 case CHIP_VEGAM: 5703 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) 5704 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true); 5705 else 5706 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false); 5707 5708 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) 5709 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true); 5710 else 5711 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false); 5712 5713 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable) 5714 polaris11_enable_gfx_quick_mg_power_gating(adev, true); 5715 else 5716 polaris11_enable_gfx_quick_mg_power_gating(adev, false); 5717 break; 5718 default: 5719 break; 5720 } 5721 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_SMG | 5722 AMD_PG_SUPPORT_RLC_SMU_HS | 5723 AMD_PG_SUPPORT_CP | 5724 AMD_PG_SUPPORT_GFX_DMG)) 5725 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5726 return 0; 5727 } 5728 5729 static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) 5730 { 5731 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 5732 int data; 5733 5734 if (amdgpu_sriov_vf(adev)) 5735 *flags = 0; 5736 5737 /* AMD_CG_SUPPORT_GFX_MGCG */ 5738 data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5739 if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) 5740 *flags |= AMD_CG_SUPPORT_GFX_MGCG; 5741 5742 /* AMD_CG_SUPPORT_GFX_CGLG */ 5743 data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5744 if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) 5745 *flags |= AMD_CG_SUPPORT_GFX_CGCG; 5746 5747 /* AMD_CG_SUPPORT_GFX_CGLS */ 5748 if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) 5749 *flags |= AMD_CG_SUPPORT_GFX_CGLS; 5750 5751 /* AMD_CG_SUPPORT_GFX_CGTS */ 5752 data = RREG32(mmCGTS_SM_CTRL_REG); 5753 if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) 5754 *flags |= AMD_CG_SUPPORT_GFX_CGTS; 5755 5756 /* AMD_CG_SUPPORT_GFX_CGTS_LS */ 5757 if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) 5758 *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; 5759 5760 /* AMD_CG_SUPPORT_GFX_RLC_LS */ 5761 data = RREG32(mmRLC_MEM_SLP_CNTL); 5762 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) 5763 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; 5764 5765 /* AMD_CG_SUPPORT_GFX_CP_LS */ 5766 data = RREG32(mmCP_MEM_SLP_CNTL); 5767 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) 5768 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; 5769 } 5770 5771 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, 5772 uint32_t reg_addr, uint32_t cmd) 5773 { 5774 uint32_t data; 5775 5776 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 5777 5778 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); 5779 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); 5780 5781 data = RREG32(mmRLC_SERDES_WR_CTRL); 5782 if (adev->asic_type == CHIP_STONEY) 5783 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5784 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5785 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5786 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5787 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5788 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5789 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5790 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5791 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5792 else 5793 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK | 5794 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK | 5795 RLC_SERDES_WR_CTRL__P1_SELECT_MASK | 5796 RLC_SERDES_WR_CTRL__P2_SELECT_MASK | 5797 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK | 5798 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK | 5799 RLC_SERDES_WR_CTRL__POWER_UP_MASK | 5800 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK | 5801 RLC_SERDES_WR_CTRL__BPM_DATA_MASK | 5802 RLC_SERDES_WR_CTRL__REG_ADDR_MASK | 5803 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK); 5804 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK | 5805 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) | 5806 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) | 5807 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT)); 5808 5809 WREG32(mmRLC_SERDES_WR_CTRL, data); 5810 } 5811 5812 #define MSG_ENTER_RLC_SAFE_MODE 1 5813 #define MSG_EXIT_RLC_SAFE_MODE 0 5814 #define RLC_GPR_REG2__REQ_MASK 0x00000001 5815 #define RLC_GPR_REG2__REQ__SHIFT 0 5816 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 5817 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e 5818 5819 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) 5820 { 5821 u32 data; 5822 unsigned i; 5823 5824 data = RREG32(mmRLC_CNTL); 5825 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5826 return; 5827 5828 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5829 data |= RLC_SAFE_MODE__CMD_MASK; 5830 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5831 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); 5832 WREG32(mmRLC_SAFE_MODE, data); 5833 5834 for (i = 0; i < adev->usec_timeout; i++) { 5835 if ((RREG32(mmRLC_GPM_STAT) & 5836 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5837 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == 5838 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | 5839 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) 5840 break; 5841 udelay(1); 5842 } 5843 5844 for (i = 0; i < adev->usec_timeout; i++) { 5845 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5846 break; 5847 udelay(1); 5848 } 5849 adev->gfx.rlc.in_safe_mode = true; 5850 } 5851 } 5852 5853 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) 5854 { 5855 u32 data = 0; 5856 unsigned i; 5857 5858 data = RREG32(mmRLC_CNTL); 5859 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) 5860 return; 5861 5862 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { 5863 if (adev->gfx.rlc.in_safe_mode) { 5864 data |= RLC_SAFE_MODE__CMD_MASK; 5865 data &= ~RLC_SAFE_MODE__MESSAGE_MASK; 5866 WREG32(mmRLC_SAFE_MODE, data); 5867 adev->gfx.rlc.in_safe_mode = false; 5868 } 5869 } 5870 5871 for (i = 0; i < adev->usec_timeout; i++) { 5872 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) 5873 break; 5874 udelay(1); 5875 } 5876 } 5877 5878 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { 5879 .enter_safe_mode = iceland_enter_rlc_safe_mode, 5880 .exit_safe_mode = iceland_exit_rlc_safe_mode 5881 }; 5882 5883 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, 5884 bool enable) 5885 { 5886 uint32_t temp, data; 5887 5888 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5889 5890 /* It is disabled by HW by default */ 5891 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { 5892 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 5893 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) 5894 /* 1 - RLC memory Light sleep */ 5895 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1); 5896 5897 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) 5898 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1); 5899 } 5900 5901 /* 3 - RLC_CGTT_MGCG_OVERRIDE */ 5902 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5903 if (adev->flags & AMD_IS_APU) 5904 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5905 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5906 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK); 5907 else 5908 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5909 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5910 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5911 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5912 5913 if (temp != data) 5914 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5915 5916 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5917 gfx_v8_0_wait_for_rlc_serdes(adev); 5918 5919 /* 5 - clear mgcg override */ 5920 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 5921 5922 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) { 5923 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */ 5924 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5925 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK); 5926 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT); 5927 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK; 5928 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK; 5929 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) && 5930 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS)) 5931 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK; 5932 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK; 5933 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT); 5934 if (temp != data) 5935 WREG32(mmCGTS_SM_CTRL_REG, data); 5936 } 5937 udelay(50); 5938 5939 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5940 gfx_v8_0_wait_for_rlc_serdes(adev); 5941 } else { 5942 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */ 5943 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5944 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK | 5945 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK | 5946 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK | 5947 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK); 5948 if (temp != data) 5949 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data); 5950 5951 /* 2 - disable MGLS in RLC */ 5952 data = RREG32(mmRLC_MEM_SLP_CNTL); 5953 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) { 5954 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK; 5955 WREG32(mmRLC_MEM_SLP_CNTL, data); 5956 } 5957 5958 /* 3 - disable MGLS in CP */ 5959 data = RREG32(mmCP_MEM_SLP_CNTL); 5960 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) { 5961 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK; 5962 WREG32(mmCP_MEM_SLP_CNTL, data); 5963 } 5964 5965 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */ 5966 temp = data = RREG32(mmCGTS_SM_CTRL_REG); 5967 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK | 5968 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK); 5969 if (temp != data) 5970 WREG32(mmCGTS_SM_CTRL_REG, data); 5971 5972 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5973 gfx_v8_0_wait_for_rlc_serdes(adev); 5974 5975 /* 6 - set mgcg override */ 5976 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD); 5977 5978 udelay(50); 5979 5980 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 5981 gfx_v8_0_wait_for_rlc_serdes(adev); 5982 } 5983 5984 adev->gfx.rlc.funcs->exit_safe_mode(adev); 5985 } 5986 5987 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, 5988 bool enable) 5989 { 5990 uint32_t temp, temp1, data, data1; 5991 5992 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); 5993 5994 adev->gfx.rlc.funcs->enter_safe_mode(adev); 5995 5996 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { 5997 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 5998 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; 5999 if (temp1 != data1) 6000 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6001 6002 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6003 gfx_v8_0_wait_for_rlc_serdes(adev); 6004 6005 /* 2 - clear cgcg override */ 6006 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD); 6007 6008 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6009 gfx_v8_0_wait_for_rlc_serdes(adev); 6010 6011 /* 3 - write cmd to set CGLS */ 6012 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD); 6013 6014 /* 4 - enable cgcg */ 6015 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; 6016 6017 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6018 /* enable cgls*/ 6019 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6020 6021 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6022 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK; 6023 6024 if (temp1 != data1) 6025 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6026 } else { 6027 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; 6028 } 6029 6030 if (temp != data) 6031 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6032 6033 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/ 6034 * Cmp_busy/GFX_Idle interrupts 6035 */ 6036 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6037 } else { 6038 /* disable cntx_empty_int_enable & GFX Idle interrupt */ 6039 gfx_v8_0_enable_gui_idle_interrupt(adev, false); 6040 6041 /* TEST CGCG */ 6042 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); 6043 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK | 6044 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK); 6045 if (temp1 != data1) 6046 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1); 6047 6048 /* read gfx register to wake up cgcg */ 6049 RREG32(mmCB_CGTT_SCLK_CTRL); 6050 RREG32(mmCB_CGTT_SCLK_CTRL); 6051 RREG32(mmCB_CGTT_SCLK_CTRL); 6052 RREG32(mmCB_CGTT_SCLK_CTRL); 6053 6054 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6055 gfx_v8_0_wait_for_rlc_serdes(adev); 6056 6057 /* write cmd to Set CGCG Overrride */ 6058 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD); 6059 6060 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ 6061 gfx_v8_0_wait_for_rlc_serdes(adev); 6062 6063 /* write cmd to Clear CGLS */ 6064 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD); 6065 6066 /* disable cgcg, cgls should be disabled too. */ 6067 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | 6068 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); 6069 if (temp != data) 6070 WREG32(mmRLC_CGCG_CGLS_CTRL, data); 6071 /* enable interrupts again for PG */ 6072 gfx_v8_0_enable_gui_idle_interrupt(adev, true); 6073 } 6074 6075 gfx_v8_0_wait_for_rlc_serdes(adev); 6076 6077 adev->gfx.rlc.funcs->exit_safe_mode(adev); 6078 } 6079 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, 6080 bool enable) 6081 { 6082 if (enable) { 6083 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) 6084 * === MGCG + MGLS + TS(CG/LS) === 6085 */ 6086 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6087 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6088 } else { 6089 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS) 6090 * === CGCG + CGLS === 6091 */ 6092 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); 6093 gfx_v8_0_update_medium_grain_clock_gating(adev, enable); 6094 } 6095 return 0; 6096 } 6097 6098 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev, 6099 enum amd_clockgating_state state) 6100 { 6101 uint32_t msg_id, pp_state = 0; 6102 uint32_t pp_support_state = 0; 6103 6104 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6105 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6106 pp_support_state = PP_STATE_SUPPORT_LS; 6107 pp_state = PP_STATE_LS; 6108 } 6109 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6110 pp_support_state |= PP_STATE_SUPPORT_CG; 6111 pp_state |= PP_STATE_CG; 6112 } 6113 if (state == AMD_CG_STATE_UNGATE) 6114 pp_state = 0; 6115 6116 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6117 PP_BLOCK_GFX_CG, 6118 pp_support_state, 6119 pp_state); 6120 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6121 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6122 } 6123 6124 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6125 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6126 pp_support_state = PP_STATE_SUPPORT_LS; 6127 pp_state = PP_STATE_LS; 6128 } 6129 6130 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6131 pp_support_state |= PP_STATE_SUPPORT_CG; 6132 pp_state |= PP_STATE_CG; 6133 } 6134 6135 if (state == AMD_CG_STATE_UNGATE) 6136 pp_state = 0; 6137 6138 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6139 PP_BLOCK_GFX_MG, 6140 pp_support_state, 6141 pp_state); 6142 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6143 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6144 } 6145 6146 return 0; 6147 } 6148 6149 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, 6150 enum amd_clockgating_state state) 6151 { 6152 6153 uint32_t msg_id, pp_state = 0; 6154 uint32_t pp_support_state = 0; 6155 6156 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) { 6157 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) { 6158 pp_support_state = PP_STATE_SUPPORT_LS; 6159 pp_state = PP_STATE_LS; 6160 } 6161 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) { 6162 pp_support_state |= PP_STATE_SUPPORT_CG; 6163 pp_state |= PP_STATE_CG; 6164 } 6165 if (state == AMD_CG_STATE_UNGATE) 6166 pp_state = 0; 6167 6168 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6169 PP_BLOCK_GFX_CG, 6170 pp_support_state, 6171 pp_state); 6172 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6173 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6174 } 6175 6176 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) { 6177 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) { 6178 pp_support_state = PP_STATE_SUPPORT_LS; 6179 pp_state = PP_STATE_LS; 6180 } 6181 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) { 6182 pp_support_state |= PP_STATE_SUPPORT_CG; 6183 pp_state |= PP_STATE_CG; 6184 } 6185 if (state == AMD_CG_STATE_UNGATE) 6186 pp_state = 0; 6187 6188 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6189 PP_BLOCK_GFX_3D, 6190 pp_support_state, 6191 pp_state); 6192 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6193 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6194 } 6195 6196 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) { 6197 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { 6198 pp_support_state = PP_STATE_SUPPORT_LS; 6199 pp_state = PP_STATE_LS; 6200 } 6201 6202 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) { 6203 pp_support_state |= PP_STATE_SUPPORT_CG; 6204 pp_state |= PP_STATE_CG; 6205 } 6206 6207 if (state == AMD_CG_STATE_UNGATE) 6208 pp_state = 0; 6209 6210 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6211 PP_BLOCK_GFX_MG, 6212 pp_support_state, 6213 pp_state); 6214 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6215 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6216 } 6217 6218 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) { 6219 pp_support_state = PP_STATE_SUPPORT_LS; 6220 6221 if (state == AMD_CG_STATE_UNGATE) 6222 pp_state = 0; 6223 else 6224 pp_state = PP_STATE_LS; 6225 6226 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6227 PP_BLOCK_GFX_RLC, 6228 pp_support_state, 6229 pp_state); 6230 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6231 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6232 } 6233 6234 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) { 6235 pp_support_state = PP_STATE_SUPPORT_LS; 6236 6237 if (state == AMD_CG_STATE_UNGATE) 6238 pp_state = 0; 6239 else 6240 pp_state = PP_STATE_LS; 6241 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX, 6242 PP_BLOCK_GFX_CP, 6243 pp_support_state, 6244 pp_state); 6245 if (adev->powerplay.pp_funcs->set_clockgating_by_smu) 6246 amdgpu_dpm_set_clockgating_by_smu(adev, msg_id); 6247 } 6248 6249 return 0; 6250 } 6251 6252 static int gfx_v8_0_set_clockgating_state(void *handle, 6253 enum amd_clockgating_state state) 6254 { 6255 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 6256 6257 if (amdgpu_sriov_vf(adev)) 6258 return 0; 6259 6260 switch (adev->asic_type) { 6261 case CHIP_FIJI: 6262 case CHIP_CARRIZO: 6263 case CHIP_STONEY: 6264 gfx_v8_0_update_gfx_clock_gating(adev, 6265 state == AMD_CG_STATE_GATE); 6266 break; 6267 case CHIP_TONGA: 6268 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state); 6269 break; 6270 case CHIP_POLARIS10: 6271 case CHIP_POLARIS11: 6272 case CHIP_POLARIS12: 6273 case CHIP_VEGAM: 6274 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state); 6275 break; 6276 default: 6277 break; 6278 } 6279 return 0; 6280 } 6281 6282 static u64 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring) 6283 { 6284 return ring->adev->wb.wb[ring->rptr_offs]; 6285 } 6286 6287 static u64 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) 6288 { 6289 struct amdgpu_device *adev = ring->adev; 6290 6291 if (ring->use_doorbell) 6292 /* XXX check if swapping is necessary on BE */ 6293 return ring->adev->wb.wb[ring->wptr_offs]; 6294 else 6295 return RREG32(mmCP_RB0_WPTR); 6296 } 6297 6298 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) 6299 { 6300 struct amdgpu_device *adev = ring->adev; 6301 6302 if (ring->use_doorbell) { 6303 /* XXX check if swapping is necessary on BE */ 6304 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6305 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6306 } else { 6307 WREG32(mmCP_RB0_WPTR, lower_32_bits(ring->wptr)); 6308 (void)RREG32(mmCP_RB0_WPTR); 6309 } 6310 } 6311 6312 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) 6313 { 6314 u32 ref_and_mask, reg_mem_engine; 6315 6316 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || 6317 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { 6318 switch (ring->me) { 6319 case 1: 6320 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6321 break; 6322 case 2: 6323 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe; 6324 break; 6325 default: 6326 return; 6327 } 6328 reg_mem_engine = 0; 6329 } else { 6330 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK; 6331 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */ 6332 } 6333 6334 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6335 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */ 6336 WAIT_REG_MEM_FUNCTION(3) | /* == */ 6337 reg_mem_engine)); 6338 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ); 6339 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE); 6340 amdgpu_ring_write(ring, ref_and_mask); 6341 amdgpu_ring_write(ring, ref_and_mask); 6342 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6343 } 6344 6345 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) 6346 { 6347 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6348 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) | 6349 EVENT_INDEX(4)); 6350 6351 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0)); 6352 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) | 6353 EVENT_INDEX(0)); 6354 } 6355 6356 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, 6357 struct amdgpu_ib *ib, 6358 unsigned vmid, bool ctx_switch) 6359 { 6360 u32 header, control = 0; 6361 6362 if (ib->flags & AMDGPU_IB_FLAG_CE) 6363 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); 6364 else 6365 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); 6366 6367 control |= ib->length_dw | (vmid << 24); 6368 6369 if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { 6370 control |= INDIRECT_BUFFER_PRE_ENB(1); 6371 6372 if (!(ib->flags & AMDGPU_IB_FLAG_CE)) 6373 gfx_v8_0_ring_emit_de_meta(ring); 6374 } 6375 6376 amdgpu_ring_write(ring, header); 6377 amdgpu_ring_write(ring, 6378 #ifdef __BIG_ENDIAN 6379 (2 << 0) | 6380 #endif 6381 (ib->gpu_addr & 0xFFFFFFFC)); 6382 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6383 amdgpu_ring_write(ring, control); 6384 } 6385 6386 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, 6387 struct amdgpu_ib *ib, 6388 unsigned vmid, bool ctx_switch) 6389 { 6390 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); 6391 6392 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); 6393 amdgpu_ring_write(ring, 6394 #ifdef __BIG_ENDIAN 6395 (2 << 0) | 6396 #endif 6397 (ib->gpu_addr & 0xFFFFFFFC)); 6398 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF); 6399 amdgpu_ring_write(ring, control); 6400 } 6401 6402 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, 6403 u64 seq, unsigned flags) 6404 { 6405 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6406 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6407 6408 /* EVENT_WRITE_EOP - flush caches, send int */ 6409 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4)); 6410 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6411 EOP_TC_ACTION_EN | 6412 EOP_TC_WB_ACTION_EN | 6413 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6414 EVENT_INDEX(5))); 6415 amdgpu_ring_write(ring, addr & 0xfffffffc); 6416 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | 6417 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6418 amdgpu_ring_write(ring, lower_32_bits(seq)); 6419 amdgpu_ring_write(ring, upper_32_bits(seq)); 6420 6421 } 6422 6423 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) 6424 { 6425 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6426 uint32_t seq = ring->fence_drv.sync_seq; 6427 uint64_t addr = ring->fence_drv.gpu_addr; 6428 6429 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6430 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */ 6431 WAIT_REG_MEM_FUNCTION(3) | /* equal */ 6432 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */ 6433 amdgpu_ring_write(ring, addr & 0xfffffffc); 6434 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); 6435 amdgpu_ring_write(ring, seq); 6436 amdgpu_ring_write(ring, 0xffffffff); 6437 amdgpu_ring_write(ring, 4); /* poll interval */ 6438 } 6439 6440 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, 6441 unsigned vmid, uint64_t pd_addr) 6442 { 6443 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); 6444 6445 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); 6446 6447 /* wait for the invalidate to complete */ 6448 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); 6449 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ 6450 WAIT_REG_MEM_FUNCTION(0) | /* always */ 6451 WAIT_REG_MEM_ENGINE(0))); /* me */ 6452 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST); 6453 amdgpu_ring_write(ring, 0); 6454 amdgpu_ring_write(ring, 0); /* ref */ 6455 amdgpu_ring_write(ring, 0); /* mask */ 6456 amdgpu_ring_write(ring, 0x20); /* poll interval */ 6457 6458 /* compute doesn't have PFP */ 6459 if (usepfp) { 6460 /* sync PFP to ME, otherwise we might get invalid PFP reads */ 6461 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); 6462 amdgpu_ring_write(ring, 0x0); 6463 } 6464 } 6465 6466 static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring) 6467 { 6468 return ring->adev->wb.wb[ring->wptr_offs]; 6469 } 6470 6471 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) 6472 { 6473 struct amdgpu_device *adev = ring->adev; 6474 6475 /* XXX check if swapping is necessary on BE */ 6476 adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); 6477 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); 6478 } 6479 6480 static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, 6481 bool acquire) 6482 { 6483 struct amdgpu_device *adev = ring->adev; 6484 int pipe_num, tmp, reg; 6485 int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; 6486 6487 pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; 6488 6489 /* first me only has 2 entries, GFX and HP3D */ 6490 if (ring->me > 0) 6491 pipe_num -= 2; 6492 6493 reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; 6494 tmp = RREG32(reg); 6495 tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); 6496 WREG32(reg, tmp); 6497 } 6498 6499 static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, 6500 struct amdgpu_ring *ring, 6501 bool acquire) 6502 { 6503 int i, pipe; 6504 bool reserve; 6505 struct amdgpu_ring *iring; 6506 6507 mutex_lock(&adev->gfx.pipe_reserve_mutex); 6508 pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); 6509 if (acquire) 6510 set_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6511 else 6512 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6513 6514 if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { 6515 /* Clear all reservations - everyone reacquires all resources */ 6516 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) 6517 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], 6518 true); 6519 6520 for (i = 0; i < adev->gfx.num_compute_rings; ++i) 6521 gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], 6522 true); 6523 } else { 6524 /* Lower all pipes without a current reservation */ 6525 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { 6526 iring = &adev->gfx.gfx_ring[i]; 6527 pipe = amdgpu_gfx_queue_to_bit(adev, 6528 iring->me, 6529 iring->pipe, 6530 0); 6531 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6532 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6533 } 6534 6535 for (i = 0; i < adev->gfx.num_compute_rings; ++i) { 6536 iring = &adev->gfx.compute_ring[i]; 6537 pipe = amdgpu_gfx_queue_to_bit(adev, 6538 iring->me, 6539 iring->pipe, 6540 0); 6541 reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); 6542 gfx_v8_0_ring_set_pipe_percent(iring, reserve); 6543 } 6544 } 6545 6546 mutex_unlock(&adev->gfx.pipe_reserve_mutex); 6547 } 6548 6549 static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, 6550 struct amdgpu_ring *ring, 6551 bool acquire) 6552 { 6553 uint32_t pipe_priority = acquire ? 0x2 : 0x0; 6554 uint32_t queue_priority = acquire ? 0xf : 0x0; 6555 6556 mutex_lock(&adev->srbm_mutex); 6557 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 6558 6559 WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); 6560 WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); 6561 6562 vi_srbm_select(adev, 0, 0, 0, 0); 6563 mutex_unlock(&adev->srbm_mutex); 6564 } 6565 static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, 6566 enum drm_sched_priority priority) 6567 { 6568 struct amdgpu_device *adev = ring->adev; 6569 bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW; 6570 6571 if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) 6572 return; 6573 6574 gfx_v8_0_hqd_set_priority(adev, ring, acquire); 6575 gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); 6576 } 6577 6578 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, 6579 u64 addr, u64 seq, 6580 unsigned flags) 6581 { 6582 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; 6583 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; 6584 6585 /* RELEASE_MEM - flush caches, send int */ 6586 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5)); 6587 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN | 6588 EOP_TC_ACTION_EN | 6589 EOP_TC_WB_ACTION_EN | 6590 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | 6591 EVENT_INDEX(5))); 6592 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0)); 6593 amdgpu_ring_write(ring, addr & 0xfffffffc); 6594 amdgpu_ring_write(ring, upper_32_bits(addr)); 6595 amdgpu_ring_write(ring, lower_32_bits(seq)); 6596 amdgpu_ring_write(ring, upper_32_bits(seq)); 6597 } 6598 6599 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, 6600 u64 seq, unsigned int flags) 6601 { 6602 /* we only allocate 32bit for each seq wb address */ 6603 BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); 6604 6605 /* write fence seq to the "addr" */ 6606 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6607 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6608 WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); 6609 amdgpu_ring_write(ring, lower_32_bits(addr)); 6610 amdgpu_ring_write(ring, upper_32_bits(addr)); 6611 amdgpu_ring_write(ring, lower_32_bits(seq)); 6612 6613 if (flags & AMDGPU_FENCE_FLAG_INT) { 6614 /* set register to trigger INT */ 6615 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6616 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | 6617 WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); 6618 amdgpu_ring_write(ring, mmCPC_INT_STATUS); 6619 amdgpu_ring_write(ring, 0); 6620 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ 6621 } 6622 } 6623 6624 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6625 { 6626 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6627 amdgpu_ring_write(ring, 0); 6628 } 6629 6630 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) 6631 { 6632 uint32_t dw2 = 0; 6633 6634 if (amdgpu_sriov_vf(ring->adev)) 6635 gfx_v8_0_ring_emit_ce_meta(ring); 6636 6637 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ 6638 if (flags & AMDGPU_HAVE_CTX_SWITCH) { 6639 gfx_v8_0_ring_emit_vgt_flush(ring); 6640 /* set load_global_config & load_global_uconfig */ 6641 dw2 |= 0x8001; 6642 /* set load_cs_sh_regs */ 6643 dw2 |= 0x01000000; 6644 /* set load_per_context_state & load_gfx_sh_regs for GFX */ 6645 dw2 |= 0x10002; 6646 6647 /* set load_ce_ram if preamble presented */ 6648 if (AMDGPU_PREAMBLE_IB_PRESENT & flags) 6649 dw2 |= 0x10000000; 6650 } else { 6651 /* still load_ce_ram if this is the first time preamble presented 6652 * although there is no context switch happens. 6653 */ 6654 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) 6655 dw2 |= 0x10000000; 6656 } 6657 6658 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); 6659 amdgpu_ring_write(ring, dw2); 6660 amdgpu_ring_write(ring, 0); 6661 } 6662 6663 static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) 6664 { 6665 unsigned ret; 6666 6667 amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); 6668 amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); 6669 amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); 6670 amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ 6671 ret = ring->wptr & ring->buf_mask; 6672 amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ 6673 return ret; 6674 } 6675 6676 static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) 6677 { 6678 unsigned cur; 6679 6680 BUG_ON(offset > ring->buf_mask); 6681 BUG_ON(ring->ring[offset] != 0x55aa55aa); 6682 6683 cur = (ring->wptr & ring->buf_mask) - 1; 6684 if (likely(cur > offset)) 6685 ring->ring[offset] = cur - offset; 6686 else 6687 ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; 6688 } 6689 6690 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) 6691 { 6692 struct amdgpu_device *adev = ring->adev; 6693 6694 amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); 6695 amdgpu_ring_write(ring, 0 | /* src: register*/ 6696 (5 << 8) | /* dst: memory */ 6697 (1 << 20)); /* write confirm */ 6698 amdgpu_ring_write(ring, reg); 6699 amdgpu_ring_write(ring, 0); 6700 amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + 6701 adev->virt.reg_val_offs * 4)); 6702 amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + 6703 adev->virt.reg_val_offs * 4)); 6704 } 6705 6706 static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, 6707 uint32_t val) 6708 { 6709 uint32_t cmd; 6710 6711 switch (ring->funcs->type) { 6712 case AMDGPU_RING_TYPE_GFX: 6713 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM; 6714 break; 6715 case AMDGPU_RING_TYPE_KIQ: 6716 cmd = 1 << 16; /* no inc addr */ 6717 break; 6718 default: 6719 cmd = WR_CONFIRM; 6720 break; 6721 } 6722 6723 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); 6724 amdgpu_ring_write(ring, cmd); 6725 amdgpu_ring_write(ring, reg); 6726 amdgpu_ring_write(ring, 0); 6727 amdgpu_ring_write(ring, val); 6728 } 6729 6730 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, 6731 enum amdgpu_interrupt_state state) 6732 { 6733 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE, 6734 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6735 } 6736 6737 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, 6738 int me, int pipe, 6739 enum amdgpu_interrupt_state state) 6740 { 6741 u32 mec_int_cntl, mec_int_cntl_reg; 6742 6743 /* 6744 * amdgpu controls only the first MEC. That's why this function only 6745 * handles the setting of interrupts for this specific MEC. All other 6746 * pipes' interrupts are set by amdkfd. 6747 */ 6748 6749 if (me == 1) { 6750 switch (pipe) { 6751 case 0: 6752 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; 6753 break; 6754 case 1: 6755 mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; 6756 break; 6757 case 2: 6758 mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; 6759 break; 6760 case 3: 6761 mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; 6762 break; 6763 default: 6764 DRM_DEBUG("invalid pipe %d\n", pipe); 6765 return; 6766 } 6767 } else { 6768 DRM_DEBUG("invalid me %d\n", me); 6769 return; 6770 } 6771 6772 switch (state) { 6773 case AMDGPU_IRQ_STATE_DISABLE: 6774 mec_int_cntl = RREG32(mec_int_cntl_reg); 6775 mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6776 WREG32(mec_int_cntl_reg, mec_int_cntl); 6777 break; 6778 case AMDGPU_IRQ_STATE_ENABLE: 6779 mec_int_cntl = RREG32(mec_int_cntl_reg); 6780 mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; 6781 WREG32(mec_int_cntl_reg, mec_int_cntl); 6782 break; 6783 default: 6784 break; 6785 } 6786 } 6787 6788 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, 6789 struct amdgpu_irq_src *source, 6790 unsigned type, 6791 enum amdgpu_interrupt_state state) 6792 { 6793 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, 6794 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6795 6796 return 0; 6797 } 6798 6799 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev, 6800 struct amdgpu_irq_src *source, 6801 unsigned type, 6802 enum amdgpu_interrupt_state state) 6803 { 6804 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, 6805 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 6806 6807 return 0; 6808 } 6809 6810 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev, 6811 struct amdgpu_irq_src *src, 6812 unsigned type, 6813 enum amdgpu_interrupt_state state) 6814 { 6815 switch (type) { 6816 case AMDGPU_CP_IRQ_GFX_EOP: 6817 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state); 6818 break; 6819 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: 6820 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state); 6821 break; 6822 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP: 6823 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state); 6824 break; 6825 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP: 6826 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state); 6827 break; 6828 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP: 6829 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state); 6830 break; 6831 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP: 6832 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state); 6833 break; 6834 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP: 6835 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state); 6836 break; 6837 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP: 6838 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state); 6839 break; 6840 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP: 6841 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state); 6842 break; 6843 default: 6844 break; 6845 } 6846 return 0; 6847 } 6848 6849 static int gfx_v8_0_set_cp_ecc_int_state(struct amdgpu_device *adev, 6850 struct amdgpu_irq_src *source, 6851 unsigned int type, 6852 enum amdgpu_interrupt_state state) 6853 { 6854 int enable_flag; 6855 6856 switch (state) { 6857 case AMDGPU_IRQ_STATE_DISABLE: 6858 enable_flag = 0; 6859 break; 6860 6861 case AMDGPU_IRQ_STATE_ENABLE: 6862 enable_flag = 1; 6863 break; 6864 6865 default: 6866 return -EINVAL; 6867 } 6868 6869 WREG32_FIELD(CP_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6870 WREG32_FIELD(CP_INT_CNTL_RING0, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6871 WREG32_FIELD(CP_INT_CNTL_RING1, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6872 WREG32_FIELD(CP_INT_CNTL_RING2, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6873 WREG32_FIELD(CPC_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, enable_flag); 6874 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6875 enable_flag); 6876 WREG32_FIELD(CP_ME1_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6877 enable_flag); 6878 WREG32_FIELD(CP_ME1_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6879 enable_flag); 6880 WREG32_FIELD(CP_ME1_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6881 enable_flag); 6882 WREG32_FIELD(CP_ME2_PIPE0_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6883 enable_flag); 6884 WREG32_FIELD(CP_ME2_PIPE1_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6885 enable_flag); 6886 WREG32_FIELD(CP_ME2_PIPE2_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6887 enable_flag); 6888 WREG32_FIELD(CP_ME2_PIPE3_INT_CNTL, CP_ECC_ERROR_INT_ENABLE, 6889 enable_flag); 6890 6891 return 0; 6892 } 6893 6894 static int gfx_v8_0_set_sq_int_state(struct amdgpu_device *adev, 6895 struct amdgpu_irq_src *source, 6896 unsigned int type, 6897 enum amdgpu_interrupt_state state) 6898 { 6899 int enable_flag; 6900 6901 switch (state) { 6902 case AMDGPU_IRQ_STATE_DISABLE: 6903 enable_flag = 1; 6904 break; 6905 6906 case AMDGPU_IRQ_STATE_ENABLE: 6907 enable_flag = 0; 6908 break; 6909 6910 default: 6911 return -EINVAL; 6912 } 6913 6914 WREG32_FIELD(SQ_INTERRUPT_MSG_CTRL, STALL, 6915 enable_flag); 6916 6917 return 0; 6918 } 6919 6920 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, 6921 struct amdgpu_irq_src *source, 6922 struct amdgpu_iv_entry *entry) 6923 { 6924 int i; 6925 u8 me_id, pipe_id, queue_id; 6926 struct amdgpu_ring *ring; 6927 6928 DRM_DEBUG("IH: CP EOP\n"); 6929 me_id = (entry->ring_id & 0x0c) >> 2; 6930 pipe_id = (entry->ring_id & 0x03) >> 0; 6931 queue_id = (entry->ring_id & 0x70) >> 4; 6932 6933 switch (me_id) { 6934 case 0: 6935 amdgpu_fence_process(&adev->gfx.gfx_ring[0]); 6936 break; 6937 case 1: 6938 case 2: 6939 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 6940 ring = &adev->gfx.compute_ring[i]; 6941 /* Per-queue interrupt is supported for MEC starting from VI. 6942 * The interrupt can only be enabled/disabled per pipe instead of per queue. 6943 */ 6944 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) 6945 amdgpu_fence_process(ring); 6946 } 6947 break; 6948 } 6949 return 0; 6950 } 6951 6952 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, 6953 struct amdgpu_irq_src *source, 6954 struct amdgpu_iv_entry *entry) 6955 { 6956 DRM_ERROR("Illegal register access in command stream\n"); 6957 schedule_work(&adev->reset_work); 6958 return 0; 6959 } 6960 6961 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, 6962 struct amdgpu_irq_src *source, 6963 struct amdgpu_iv_entry *entry) 6964 { 6965 DRM_ERROR("Illegal instruction in command stream\n"); 6966 schedule_work(&adev->reset_work); 6967 return 0; 6968 } 6969 6970 static int gfx_v8_0_cp_ecc_error_irq(struct amdgpu_device *adev, 6971 struct amdgpu_irq_src *source, 6972 struct amdgpu_iv_entry *entry) 6973 { 6974 DRM_ERROR("CP EDC/ECC error detected."); 6975 return 0; 6976 } 6977 6978 static void gfx_v8_0_parse_sq_irq(struct amdgpu_device *adev, unsigned ih_data) 6979 { 6980 u32 enc, se_id, sh_id, cu_id; 6981 char type[20]; 6982 int sq_edc_source = -1; 6983 6984 enc = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, ENCODING); 6985 se_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_CMN, SE_ID); 6986 6987 switch (enc) { 6988 case 0: 6989 DRM_INFO("SQ general purpose intr detected:" 6990 "se_id %d, immed_overflow %d, host_reg_overflow %d," 6991 "host_cmd_overflow %d, cmd_timestamp %d," 6992 "reg_timestamp %d, thread_trace_buff_full %d," 6993 "wlt %d, thread_trace %d.\n", 6994 se_id, 6995 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, IMMED_OVERFLOW), 6996 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_REG_OVERFLOW), 6997 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, HOST_CMD_OVERFLOW), 6998 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, CMD_TIMESTAMP), 6999 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, REG_TIMESTAMP), 7000 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE_BUF_FULL), 7001 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, WLT), 7002 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_AUTO, THREAD_TRACE) 7003 ); 7004 break; 7005 case 1: 7006 case 2: 7007 7008 cu_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, CU_ID); 7009 sh_id = REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SH_ID); 7010 7011 /* 7012 * This function can be called either directly from ISR 7013 * or from BH in which case we can access SQ_EDC_INFO 7014 * instance 7015 */ 7016 if (in_task()) { 7017 mutex_lock(&adev->grbm_idx_mutex); 7018 gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id); 7019 7020 sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE); 7021 7022 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7023 mutex_unlock(&adev->grbm_idx_mutex); 7024 } 7025 7026 if (enc == 1) 7027 sprintf(type, "instruction intr"); 7028 else 7029 sprintf(type, "EDC/ECC error"); 7030 7031 DRM_INFO( 7032 "SQ %s detected: " 7033 "se_id %d, sh_id %d, cu_id %d, simd_id %d, wave_id %d, vm_id %d " 7034 "trap %s, sq_ed_info.source %s.\n", 7035 type, se_id, sh_id, cu_id, 7036 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, SIMD_ID), 7037 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, WAVE_ID), 7038 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, VM_ID), 7039 REG_GET_FIELD(ih_data, SQ_INTERRUPT_WORD_WAVE, PRIV) ? "true" : "false", 7040 (sq_edc_source != -1) ? sq_edc_source_names[sq_edc_source] : "unavailable" 7041 ); 7042 break; 7043 default: 7044 DRM_ERROR("SQ invalid encoding type\n."); 7045 } 7046 } 7047 7048 static void gfx_v8_0_sq_irq_work_func(struct work_struct *work) 7049 { 7050 7051 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.sq_work.work); 7052 struct sq_work *sq_work = container_of(work, struct sq_work, work); 7053 7054 gfx_v8_0_parse_sq_irq(adev, sq_work->ih_data); 7055 } 7056 7057 static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, 7058 struct amdgpu_irq_src *source, 7059 struct amdgpu_iv_entry *entry) 7060 { 7061 unsigned ih_data = entry->src_data[0]; 7062 7063 /* 7064 * Try to submit work so SQ_EDC_INFO can be accessed from 7065 * BH. If previous work submission hasn't finished yet 7066 * just print whatever info is possible directly from the ISR. 7067 */ 7068 if (work_pending(&adev->gfx.sq_work.work)) { 7069 gfx_v8_0_parse_sq_irq(adev, ih_data); 7070 } else { 7071 adev->gfx.sq_work.ih_data = ih_data; 7072 schedule_work(&adev->gfx.sq_work.work); 7073 } 7074 7075 return 0; 7076 } 7077 7078 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, 7079 struct amdgpu_irq_src *src, 7080 unsigned int type, 7081 enum amdgpu_interrupt_state state) 7082 { 7083 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7084 7085 switch (type) { 7086 case AMDGPU_CP_KIQ_IRQ_DRIVER0: 7087 WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, 7088 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7089 if (ring->me == 1) 7090 WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, 7091 ring->pipe, 7092 GENERIC2_INT_ENABLE, 7093 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7094 else 7095 WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, 7096 ring->pipe, 7097 GENERIC2_INT_ENABLE, 7098 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); 7099 break; 7100 default: 7101 BUG(); /* kiq only support GENERIC2_INT now */ 7102 break; 7103 } 7104 return 0; 7105 } 7106 7107 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, 7108 struct amdgpu_irq_src *source, 7109 struct amdgpu_iv_entry *entry) 7110 { 7111 u8 me_id, pipe_id, queue_id; 7112 struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); 7113 7114 me_id = (entry->ring_id & 0x0c) >> 2; 7115 pipe_id = (entry->ring_id & 0x03) >> 0; 7116 queue_id = (entry->ring_id & 0x70) >> 4; 7117 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", 7118 me_id, pipe_id, queue_id); 7119 7120 amdgpu_fence_process(ring); 7121 return 0; 7122 } 7123 7124 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 7125 .name = "gfx_v8_0", 7126 .early_init = gfx_v8_0_early_init, 7127 .late_init = gfx_v8_0_late_init, 7128 .sw_init = gfx_v8_0_sw_init, 7129 .sw_fini = gfx_v8_0_sw_fini, 7130 .hw_init = gfx_v8_0_hw_init, 7131 .hw_fini = gfx_v8_0_hw_fini, 7132 .suspend = gfx_v8_0_suspend, 7133 .resume = gfx_v8_0_resume, 7134 .is_idle = gfx_v8_0_is_idle, 7135 .wait_for_idle = gfx_v8_0_wait_for_idle, 7136 .check_soft_reset = gfx_v8_0_check_soft_reset, 7137 .pre_soft_reset = gfx_v8_0_pre_soft_reset, 7138 .soft_reset = gfx_v8_0_soft_reset, 7139 .post_soft_reset = gfx_v8_0_post_soft_reset, 7140 .set_clockgating_state = gfx_v8_0_set_clockgating_state, 7141 .set_powergating_state = gfx_v8_0_set_powergating_state, 7142 .get_clockgating_state = gfx_v8_0_get_clockgating_state, 7143 }; 7144 7145 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7146 .type = AMDGPU_RING_TYPE_GFX, 7147 .align_mask = 0xff, 7148 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7149 .support_64bit_ptrs = false, 7150 .get_rptr = gfx_v8_0_ring_get_rptr, 7151 .get_wptr = gfx_v8_0_ring_get_wptr_gfx, 7152 .set_wptr = gfx_v8_0_ring_set_wptr_gfx, 7153 .emit_frame_size = /* maximum 215dw if count 16 IBs in */ 7154 5 + /* COND_EXEC */ 7155 7 + /* PIPELINE_SYNC */ 7156 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ 7157 8 + /* FENCE for VM_FLUSH */ 7158 20 + /* GDS switch */ 7159 4 + /* double SWITCH_BUFFER, 7160 the first COND_EXEC jump to the place just 7161 prior to this double SWITCH_BUFFER */ 7162 5 + /* COND_EXEC */ 7163 7 + /* HDP_flush */ 7164 4 + /* VGT_flush */ 7165 14 + /* CE_META */ 7166 31 + /* DE_META */ 7167 3 + /* CNTX_CTRL */ 7168 5 + /* HDP_INVL */ 7169 8 + 8 + /* FENCE x2 */ 7170 2, /* SWITCH_BUFFER */ 7171 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ 7172 .emit_ib = gfx_v8_0_ring_emit_ib_gfx, 7173 .emit_fence = gfx_v8_0_ring_emit_fence_gfx, 7174 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7175 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7176 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7177 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7178 .test_ring = gfx_v8_0_ring_test_ring, 7179 .test_ib = gfx_v8_0_ring_test_ib, 7180 .insert_nop = amdgpu_ring_insert_nop, 7181 .pad_ib = amdgpu_ring_generic_pad_ib, 7182 .emit_switch_buffer = gfx_v8_ring_emit_sb, 7183 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, 7184 .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, 7185 .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, 7186 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7187 }; 7188 7189 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7190 .type = AMDGPU_RING_TYPE_COMPUTE, 7191 .align_mask = 0xff, 7192 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7193 .support_64bit_ptrs = false, 7194 .get_rptr = gfx_v8_0_ring_get_rptr, 7195 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7196 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7197 .emit_frame_size = 7198 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7199 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7200 5 + /* hdp_invalidate */ 7201 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7202 VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7203 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7204 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7205 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7206 .emit_fence = gfx_v8_0_ring_emit_fence_compute, 7207 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync, 7208 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, 7209 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, 7210 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, 7211 .test_ring = gfx_v8_0_ring_test_ring, 7212 .test_ib = gfx_v8_0_ring_test_ib, 7213 .insert_nop = amdgpu_ring_insert_nop, 7214 .pad_ib = amdgpu_ring_generic_pad_ib, 7215 .set_priority = gfx_v8_0_ring_set_priority_compute, 7216 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7217 }; 7218 7219 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 7220 .type = AMDGPU_RING_TYPE_KIQ, 7221 .align_mask = 0xff, 7222 .nop = PACKET3(PACKET3_NOP, 0x3FFF), 7223 .support_64bit_ptrs = false, 7224 .get_rptr = gfx_v8_0_ring_get_rptr, 7225 .get_wptr = gfx_v8_0_ring_get_wptr_compute, 7226 .set_wptr = gfx_v8_0_ring_set_wptr_compute, 7227 .emit_frame_size = 7228 20 + /* gfx_v8_0_ring_emit_gds_switch */ 7229 7 + /* gfx_v8_0_ring_emit_hdp_flush */ 7230 5 + /* hdp_invalidate */ 7231 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7232 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7233 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ 7234 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ 7235 .emit_ib = gfx_v8_0_ring_emit_ib_compute, 7236 .emit_fence = gfx_v8_0_ring_emit_fence_kiq, 7237 .test_ring = gfx_v8_0_ring_test_ring, 7238 .test_ib = gfx_v8_0_ring_test_ib, 7239 .insert_nop = amdgpu_ring_insert_nop, 7240 .pad_ib = amdgpu_ring_generic_pad_ib, 7241 .emit_rreg = gfx_v8_0_ring_emit_rreg, 7242 .emit_wreg = gfx_v8_0_ring_emit_wreg, 7243 }; 7244 7245 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7246 { 7247 int i; 7248 7249 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; 7250 7251 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7252 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7253 7254 for (i = 0; i < adev->gfx.num_compute_rings; i++) 7255 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute; 7256 } 7257 7258 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = { 7259 .set = gfx_v8_0_set_eop_interrupt_state, 7260 .process = gfx_v8_0_eop_irq, 7261 }; 7262 7263 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = { 7264 .set = gfx_v8_0_set_priv_reg_fault_state, 7265 .process = gfx_v8_0_priv_reg_irq, 7266 }; 7267 7268 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { 7269 .set = gfx_v8_0_set_priv_inst_fault_state, 7270 .process = gfx_v8_0_priv_inst_irq, 7271 }; 7272 7273 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { 7274 .set = gfx_v8_0_kiq_set_interrupt_state, 7275 .process = gfx_v8_0_kiq_irq, 7276 }; 7277 7278 static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { 7279 .set = gfx_v8_0_set_cp_ecc_int_state, 7280 .process = gfx_v8_0_cp_ecc_error_irq, 7281 }; 7282 7283 static const struct amdgpu_irq_src_funcs gfx_v8_0_sq_irq_funcs = { 7284 .set = gfx_v8_0_set_sq_int_state, 7285 .process = gfx_v8_0_sq_irq, 7286 }; 7287 7288 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7289 { 7290 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7291 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs; 7292 7293 adev->gfx.priv_reg_irq.num_types = 1; 7294 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs; 7295 7296 adev->gfx.priv_inst_irq.num_types = 1; 7297 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7298 7299 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; 7300 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; 7301 7302 adev->gfx.cp_ecc_error_irq.num_types = 1; 7303 adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; 7304 7305 adev->gfx.sq_irq.num_types = 1; 7306 adev->gfx.sq_irq.funcs = &gfx_v8_0_sq_irq_funcs; 7307 } 7308 7309 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7310 { 7311 adev->gfx.rlc.funcs = &iceland_rlc_funcs; 7312 } 7313 7314 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) 7315 { 7316 /* init asci gds info */ 7317 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE); 7318 adev->gds.gws.total_size = 64; 7319 adev->gds.oa.total_size = 16; 7320 7321 if (adev->gds.mem.total_size == 64 * 1024) { 7322 adev->gds.mem.gfx_partition_size = 4096; 7323 adev->gds.mem.cs_partition_size = 4096; 7324 7325 adev->gds.gws.gfx_partition_size = 4; 7326 adev->gds.gws.cs_partition_size = 4; 7327 7328 adev->gds.oa.gfx_partition_size = 4; 7329 adev->gds.oa.cs_partition_size = 1; 7330 } else { 7331 adev->gds.mem.gfx_partition_size = 1024; 7332 adev->gds.mem.cs_partition_size = 1024; 7333 7334 adev->gds.gws.gfx_partition_size = 16; 7335 adev->gds.gws.cs_partition_size = 16; 7336 7337 adev->gds.oa.gfx_partition_size = 4; 7338 adev->gds.oa.cs_partition_size = 4; 7339 } 7340 } 7341 7342 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, 7343 u32 bitmap) 7344 { 7345 u32 data; 7346 7347 if (!bitmap) 7348 return; 7349 7350 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; 7351 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; 7352 7353 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); 7354 } 7355 7356 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) 7357 { 7358 u32 data, mask; 7359 7360 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | 7361 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); 7362 7363 mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); 7364 7365 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; 7366 } 7367 7368 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) 7369 { 7370 int i, j, k, counter, active_cu_number = 0; 7371 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; 7372 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; 7373 unsigned disable_masks[4 * 2]; 7374 u32 ao_cu_num; 7375 7376 memset(cu_info, 0, sizeof(*cu_info)); 7377 7378 if (adev->flags & AMD_IS_APU) 7379 ao_cu_num = 2; 7380 else 7381 ao_cu_num = adev->gfx.config.max_cu_per_sh; 7382 7383 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); 7384 7385 mutex_lock(&adev->grbm_idx_mutex); 7386 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { 7387 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { 7388 mask = 1; 7389 ao_bitmap = 0; 7390 counter = 0; 7391 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff); 7392 if (i < 4 && j < 2) 7393 gfx_v8_0_set_user_cu_inactive_bitmap( 7394 adev, disable_masks[i * 2 + j]); 7395 bitmap = gfx_v8_0_get_cu_active_bitmap(adev); 7396 cu_info->bitmap[i][j] = bitmap; 7397 7398 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { 7399 if (bitmap & mask) { 7400 if (counter < ao_cu_num) 7401 ao_bitmap |= mask; 7402 counter ++; 7403 } 7404 mask <<= 1; 7405 } 7406 active_cu_number += counter; 7407 if (i < 2 && j < 2) 7408 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); 7409 cu_info->ao_cu_bitmap[i][j] = ao_bitmap; 7410 } 7411 } 7412 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); 7413 mutex_unlock(&adev->grbm_idx_mutex); 7414 7415 cu_info->number = active_cu_number; 7416 cu_info->ao_cu_mask = ao_cu_mask; 7417 cu_info->simd_per_cu = NUM_SIMD_PER_CU; 7418 cu_info->max_waves_per_simd = 10; 7419 cu_info->max_scratch_slots_per_cu = 32; 7420 cu_info->wave_front_size = 64; 7421 cu_info->lds_size = 64; 7422 } 7423 7424 const struct amdgpu_ip_block_version gfx_v8_0_ip_block = 7425 { 7426 .type = AMD_IP_BLOCK_TYPE_GFX, 7427 .major = 8, 7428 .minor = 0, 7429 .rev = 0, 7430 .funcs = &gfx_v8_0_ip_funcs, 7431 }; 7432 7433 const struct amdgpu_ip_block_version gfx_v8_1_ip_block = 7434 { 7435 .type = AMD_IP_BLOCK_TYPE_GFX, 7436 .major = 8, 7437 .minor = 1, 7438 .rev = 0, 7439 .funcs = &gfx_v8_0_ip_funcs, 7440 }; 7441 7442 static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) 7443 { 7444 uint64_t ce_payload_addr; 7445 int cnt_ce; 7446 union { 7447 struct vi_ce_ib_state regular; 7448 struct vi_ce_ib_state_chained_ib chained; 7449 } ce_payload = {}; 7450 7451 if (ring->adev->virt.chained_ib_support) { 7452 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7453 offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); 7454 cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; 7455 } else { 7456 ce_payload_addr = amdgpu_csa_vaddr(ring->adev) + 7457 offsetof(struct vi_gfx_meta_data, ce_payload); 7458 cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; 7459 } 7460 7461 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); 7462 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | 7463 WRITE_DATA_DST_SEL(8) | 7464 WR_CONFIRM) | 7465 WRITE_DATA_CACHE_POLICY(0)); 7466 amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); 7467 amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); 7468 amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); 7469 } 7470 7471 static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) 7472 { 7473 uint64_t de_payload_addr, gds_addr, csa_addr; 7474 int cnt_de; 7475 union { 7476 struct vi_de_ib_state regular; 7477 struct vi_de_ib_state_chained_ib chained; 7478 } de_payload = {}; 7479 7480 csa_addr = amdgpu_csa_vaddr(ring->adev); 7481 gds_addr = csa_addr + 4096; 7482 if (ring->adev->virt.chained_ib_support) { 7483 de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); 7484 de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); 7485 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, de_payload); 7486 cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; 7487 } else { 7488 de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); 7489 de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); 7490 de_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, de_payload); 7491 cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; 7492 } 7493 7494 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); 7495 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | 7496 WRITE_DATA_DST_SEL(8) | 7497 WR_CONFIRM) | 7498 WRITE_DATA_CACHE_POLICY(0)); 7499 amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); 7500 amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); 7501 amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); 7502 } 7503