1 /* 2 * Copyright 2021 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #define SWSMU_CODE_LAYER_L2 25 26 #include <linux/firmware.h> 27 #include "amdgpu.h" 28 #include "amdgpu_smu.h" 29 #include "atomfirmware.h" 30 #include "amdgpu_atomfirmware.h" 31 #include "amdgpu_atombios.h" 32 #include "smu_v13_0_6_pmfw.h" 33 #include "smu13_driver_if_v13_0_6.h" 34 #include "smu_v13_0_6_ppsmc.h" 35 #include "soc15_common.h" 36 #include "atom.h" 37 #include "power_state.h" 38 #include "smu_v13_0.h" 39 #include "smu_v13_0_6_ppt.h" 40 #include "nbio/nbio_7_4_offset.h" 41 #include "nbio/nbio_7_4_sh_mask.h" 42 #include "thm/thm_11_0_2_offset.h" 43 #include "thm/thm_11_0_2_sh_mask.h" 44 #include "amdgpu_xgmi.h" 45 #include <linux/pci.h> 46 #include "amdgpu_ras.h" 47 #include "amdgpu_mca.h" 48 #include "amdgpu_aca.h" 49 #include "smu_cmn.h" 50 #include "mp/mp_13_0_6_offset.h" 51 #include "mp/mp_13_0_6_sh_mask.h" 52 #include "umc_v12_0.h" 53 54 #undef MP1_Public 55 #undef smnMP1_FIRMWARE_FLAGS 56 57 /* TODO: Check final register offsets */ 58 #define MP1_Public 0x03b00000 59 #define smnMP1_FIRMWARE_FLAGS 0x3010028 60 /* 61 * DO NOT use these for err/warn/info/debug messages. 62 * Use dev_err, dev_warn, dev_info and dev_dbg instead. 63 * They are more MGPU friendly. 64 */ 65 #undef pr_err 66 #undef pr_warn 67 #undef pr_info 68 #undef pr_debug 69 70 MODULE_FIRMWARE("amdgpu/smu_13_0_6.bin"); 71 MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin"); 72 73 #define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) 74 75 #define SMU_13_0_6_FEA_MAP(smu_feature, smu_13_0_6_feature) \ 76 [smu_feature] = { 1, (smu_13_0_6_feature) } 77 78 #define FEATURE_MASK(feature) (1ULL << feature) 79 #define SMC_DPM_FEATURE \ 80 (FEATURE_MASK(FEATURE_DATA_CALCULATION) | \ 81 FEATURE_MASK(FEATURE_DPM_GFXCLK) | FEATURE_MASK(FEATURE_DPM_UCLK) | \ 82 FEATURE_MASK(FEATURE_DPM_SOCCLK) | FEATURE_MASK(FEATURE_DPM_FCLK) | \ 83 FEATURE_MASK(FEATURE_DPM_LCLK) | FEATURE_MASK(FEATURE_DPM_XGMI) | \ 84 FEATURE_MASK(FEATURE_DPM_VCN)) 85 86 /* possible frequency drift (1Mhz) */ 87 #define EPSILON 1 88 89 #define smnPCIE_ESM_CTRL 0x93D0 90 #define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288 91 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L 92 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4 93 #define MAX_LINK_WIDTH 6 94 95 #define smnPCIE_LC_SPEED_CNTL 0x1a340290 96 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0 97 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5 98 #define LINK_SPEED_MAX 4 99 #define SMU_13_0_6_DSCLK_THRESHOLD 140 100 101 #define MCA_BANK_IPID(_ip, _hwid, _type) \ 102 [AMDGPU_MCA_IP_##_ip] = { .hwid = _hwid, .mcatype = _type, } 103 104 struct mca_bank_ipid { 105 enum amdgpu_mca_ip ip; 106 uint16_t hwid; 107 uint16_t mcatype; 108 }; 109 110 struct mca_ras_info { 111 enum amdgpu_ras_block blkid; 112 enum amdgpu_mca_ip ip; 113 int *err_code_array; 114 int err_code_count; 115 int (*get_err_count)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 116 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); 117 bool (*bank_is_valid)(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 118 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry); 119 }; 120 121 #define P2S_TABLE_ID_A 0x50325341 122 #define P2S_TABLE_ID_X 0x50325358 123 #define P2S_TABLE_ID_3 0x50325303 124 125 // clang-format off 126 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = { 127 MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0), 128 MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), 129 MSG_MAP(GetDriverIfVersion, PPSMC_MSG_GetDriverIfVersion, 1), 130 MSG_MAP(EnableAllSmuFeatures, PPSMC_MSG_EnableAllSmuFeatures, 0), 131 MSG_MAP(DisableAllSmuFeatures, PPSMC_MSG_DisableAllSmuFeatures, 0), 132 MSG_MAP(RequestI2cTransaction, PPSMC_MSG_RequestI2cTransaction, 0), 133 MSG_MAP(GetMetricsTable, PPSMC_MSG_GetMetricsTable, 1), 134 MSG_MAP(GetMetricsVersion, PPSMC_MSG_GetMetricsVersion, 1), 135 MSG_MAP(GetEnabledSmuFeaturesHigh, PPSMC_MSG_GetEnabledSmuFeaturesHigh, 1), 136 MSG_MAP(GetEnabledSmuFeaturesLow, PPSMC_MSG_GetEnabledSmuFeaturesLow, 1), 137 MSG_MAP(SetDriverDramAddrHigh, PPSMC_MSG_SetDriverDramAddrHigh, 1), 138 MSG_MAP(SetDriverDramAddrLow, PPSMC_MSG_SetDriverDramAddrLow, 1), 139 MSG_MAP(SetToolsDramAddrHigh, PPSMC_MSG_SetToolsDramAddrHigh, 0), 140 MSG_MAP(SetToolsDramAddrLow, PPSMC_MSG_SetToolsDramAddrLow, 0), 141 MSG_MAP(SetSoftMinByFreq, PPSMC_MSG_SetSoftMinByFreq, 0), 142 MSG_MAP(SetSoftMaxByFreq, PPSMC_MSG_SetSoftMaxByFreq, 1), 143 MSG_MAP(GetMinDpmFreq, PPSMC_MSG_GetMinDpmFreq, 1), 144 MSG_MAP(GetMaxDpmFreq, PPSMC_MSG_GetMaxDpmFreq, 1), 145 MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1), 146 MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 1), 147 MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1), 148 MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK), 149 MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0), 150 MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0), 151 MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0), 152 MSG_MAP(GetDebugData, PPSMC_MSG_GetDebugData, 0), 153 MSG_MAP(SetNumBadHbmPagesRetired, PPSMC_MSG_SetNumBadHbmPagesRetired, 0), 154 MSG_MAP(DFCstateControl, PPSMC_MSG_DFCstateControl, 0), 155 MSG_MAP(GetGmiPwrDnHyst, PPSMC_MSG_GetGmiPwrDnHyst, 0), 156 MSG_MAP(SetGmiPwrDnHyst, PPSMC_MSG_SetGmiPwrDnHyst, 0), 157 MSG_MAP(GmiPwrDnControl, PPSMC_MSG_GmiPwrDnControl, 0), 158 MSG_MAP(EnterGfxoff, PPSMC_MSG_EnterGfxoff, 0), 159 MSG_MAP(ExitGfxoff, PPSMC_MSG_ExitGfxoff, 0), 160 MSG_MAP(EnableDeterminism, PPSMC_MSG_EnableDeterminism, 0), 161 MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), 162 MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), 163 MSG_MAP(GetMinGfxclkFrequency, PPSMC_MSG_GetMinGfxDpmFreq, 1), 164 MSG_MAP(GetMaxGfxclkFrequency, PPSMC_MSG_GetMaxGfxDpmFreq, 1), 165 MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 1), 166 MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 1), 167 MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0), 168 MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0), 169 MSG_MAP(GetThermalLimit, PPSMC_MSG_ReadThrottlerLimit, 0), 170 MSG_MAP(ClearMcaOnRead, PPSMC_MSG_ClearMcaOnRead, 0), 171 MSG_MAP(QueryValidMcaCount, PPSMC_MSG_QueryValidMcaCount, SMU_MSG_RAS_PRI), 172 MSG_MAP(QueryValidMcaCeCount, PPSMC_MSG_QueryValidMcaCeCount, SMU_MSG_RAS_PRI), 173 MSG_MAP(McaBankDumpDW, PPSMC_MSG_McaBankDumpDW, SMU_MSG_RAS_PRI), 174 MSG_MAP(McaBankCeDumpDW, PPSMC_MSG_McaBankCeDumpDW, SMU_MSG_RAS_PRI), 175 MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), 176 MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), 177 MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0), 178 MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), 179 MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0), 180 MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1), 181 }; 182 183 // clang-format on 184 static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = { 185 CLK_MAP(SOCCLK, PPCLK_SOCCLK), 186 CLK_MAP(FCLK, PPCLK_FCLK), 187 CLK_MAP(UCLK, PPCLK_UCLK), 188 CLK_MAP(MCLK, PPCLK_UCLK), 189 CLK_MAP(DCLK, PPCLK_DCLK), 190 CLK_MAP(VCLK, PPCLK_VCLK), 191 CLK_MAP(LCLK, PPCLK_LCLK), 192 }; 193 194 static const struct cmn2asic_mapping smu_v13_0_6_feature_mask_map[SMU_FEATURE_COUNT] = { 195 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DATA_CALCULATIONS_BIT, FEATURE_DATA_CALCULATION), 196 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_GFXCLK_BIT, FEATURE_DPM_GFXCLK), 197 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_UCLK_BIT, FEATURE_DPM_UCLK), 198 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_SOCCLK_BIT, FEATURE_DPM_SOCCLK), 199 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_FCLK_BIT, FEATURE_DPM_FCLK), 200 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_LCLK_BIT, FEATURE_DPM_LCLK), 201 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_VCLK_BIT, FEATURE_DPM_VCN), 202 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_DCLK_BIT, FEATURE_DPM_VCN), 203 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DPM_XGMI_BIT, FEATURE_DPM_XGMI), 204 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_GFXCLK_BIT, FEATURE_DS_GFXCLK), 205 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_SOCCLK_BIT, FEATURE_DS_SOCCLK), 206 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_LCLK_BIT, FEATURE_DS_LCLK), 207 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_FCLK_BIT, FEATURE_DS_FCLK), 208 SMU_13_0_6_FEA_MAP(SMU_FEATURE_VCN_DPM_BIT, FEATURE_DPM_VCN), 209 SMU_13_0_6_FEA_MAP(SMU_FEATURE_PPT_BIT, FEATURE_PPT), 210 SMU_13_0_6_FEA_MAP(SMU_FEATURE_TDC_BIT, FEATURE_TDC), 211 SMU_13_0_6_FEA_MAP(SMU_FEATURE_APCC_DFLL_BIT, FEATURE_APCC_DFLL), 212 SMU_13_0_6_FEA_MAP(SMU_FEATURE_MP1_CG_BIT, FEATURE_SMU_CG), 213 SMU_13_0_6_FEA_MAP(SMU_FEATURE_GFXOFF_BIT, FEATURE_GFXOFF), 214 SMU_13_0_6_FEA_MAP(SMU_FEATURE_FW_CTF_BIT, FEATURE_FW_CTF), 215 SMU_13_0_6_FEA_MAP(SMU_FEATURE_THERMAL_BIT, FEATURE_THERMAL), 216 SMU_13_0_6_FEA_MAP(SMU_FEATURE_XGMI_PER_LINK_PWR_DWN_BIT, FEATURE_XGMI_PER_LINK_PWR_DOWN), 217 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DF_CSTATE_BIT, FEATURE_DF_CSTATE), 218 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_VCN_BIT, FEATURE_DS_VCN), 219 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP1CLK_BIT, FEATURE_DS_MP1CLK), 220 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MPIOCLK_BIT, FEATURE_DS_MPIOCLK), 221 SMU_13_0_6_FEA_MAP(SMU_FEATURE_DS_MP0CLK_BIT, FEATURE_DS_MP0CLK), 222 }; 223 224 #define TABLE_PMSTATUSLOG 0 225 #define TABLE_SMU_METRICS 1 226 #define TABLE_I2C_COMMANDS 2 227 #define TABLE_COUNT 3 228 229 static const struct cmn2asic_mapping smu_v13_0_6_table_map[SMU_TABLE_COUNT] = { 230 TAB_MAP(PMSTATUSLOG), 231 TAB_MAP(SMU_METRICS), 232 TAB_MAP(I2C_COMMANDS), 233 }; 234 235 static const uint8_t smu_v13_0_6_throttler_map[] = { 236 [THROTTLER_PPT_BIT] = (SMU_THROTTLER_PPT0_BIT), 237 [THROTTLER_THERMAL_SOCKET_BIT] = (SMU_THROTTLER_TEMP_GPU_BIT), 238 [THROTTLER_THERMAL_HBM_BIT] = (SMU_THROTTLER_TEMP_MEM_BIT), 239 [THROTTLER_THERMAL_VR_BIT] = (SMU_THROTTLER_TEMP_VR_GFX_BIT), 240 [THROTTLER_PROCHOT_BIT] = (SMU_THROTTLER_PROCHOT_GFX_BIT), 241 }; 242 243 #define GET_GPU_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V0) ?\ 244 (metrics_v0->field) : (metrics_v2->field)) 245 #define GET_METRIC_FIELD(field, version) ((version == METRICS_VERSION_V1) ?\ 246 (metrics_v1->field) : GET_GPU_METRIC_FIELD(field, version)) 247 #define METRICS_TABLE_SIZE (max3(sizeof(MetricsTableV0_t),\ 248 sizeof(MetricsTableV1_t),\ 249 sizeof(MetricsTableV2_t))) 250 251 struct smu_v13_0_6_dpm_map { 252 enum smu_clk_type clk_type; 253 uint32_t feature_num; 254 struct smu_13_0_dpm_table *dpm_table; 255 uint32_t *freq_table; 256 }; 257 258 static inline int smu_v13_0_6_get_metrics_version(struct smu_context *smu) 259 { 260 if ((smu->adev->flags & AMD_IS_APU) && 261 smu->smc_fw_version <= 0x4556900) 262 return METRICS_VERSION_V1; 263 else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 264 IP_VERSION(13, 0, 12)) 265 return METRICS_VERSION_V2; 266 267 return METRICS_VERSION_V0; 268 } 269 270 static inline void smu_v13_0_6_cap_set(struct smu_context *smu, 271 enum smu_v13_0_6_caps cap) 272 { 273 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 274 275 dpm_context->caps |= BIT_ULL(cap); 276 } 277 278 static inline void smu_v13_0_6_cap_clear(struct smu_context *smu, 279 enum smu_v13_0_6_caps cap) 280 { 281 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 282 283 dpm_context->caps &= ~BIT_ULL(cap); 284 } 285 286 bool smu_v13_0_6_cap_supported(struct smu_context *smu, 287 enum smu_v13_0_6_caps cap) 288 { 289 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 290 291 return !!(dpm_context->caps & BIT_ULL(cap)); 292 } 293 294 static void smu_v13_0_14_init_caps(struct smu_context *smu) 295 { 296 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 297 SMU_CAP(SET_UCLK_MAX), 298 SMU_CAP(DPM_POLICY), 299 SMU_CAP(PCIE_METRICS), 300 SMU_CAP(CTF_LIMIT), 301 SMU_CAP(MCA_DEBUG_MODE), 302 SMU_CAP(RMA_MSG), 303 SMU_CAP(ACA_SYND) }; 304 uint32_t fw_ver = smu->smc_fw_version; 305 306 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 307 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 308 309 if (fw_ver >= 0x05550E00) 310 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); 311 if (fw_ver >= 0x05550B00) 312 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 313 if (fw_ver >= 0x5551200) 314 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 315 if (fw_ver >= 0x5551800) 316 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 317 if (fw_ver >= 0x5551600) { 318 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); 319 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); 320 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 321 } 322 } 323 324 static void smu_v13_0_12_init_caps(struct smu_context *smu) 325 { 326 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 327 SMU_CAP(PCIE_METRICS), 328 SMU_CAP(CTF_LIMIT), 329 SMU_CAP(MCA_DEBUG_MODE), 330 SMU_CAP(RMA_MSG), 331 SMU_CAP(ACA_SYND), 332 SMU_CAP(OTHER_END_METRICS), 333 SMU_CAP(PER_INST_METRICS) }; 334 uint32_t fw_ver = smu->smc_fw_version; 335 336 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 337 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 338 339 if (fw_ver < 0x00561900) 340 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); 341 342 if (fw_ver >= 0x00561700) 343 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 344 345 if (fw_ver >= 0x00561E00) 346 smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS)); 347 348 if (fw_ver >= 0x00562500) 349 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); 350 351 if (fw_ver >= 0x04560100) { 352 smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE)); 353 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 354 } 355 356 if (fw_ver > 0x04560900) 357 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 358 359 if (fw_ver >= 0x04560D00) 360 smu_v13_0_6_cap_set(smu, SMU_CAP(FAST_PPT)); 361 362 if (fw_ver >= 0x04560700) { 363 if (fw_ver >= 0x04560900) { 364 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); 365 if (smu->adev->gmc.xgmi.physical_node_id == 0) 366 smu_v13_0_6_cap_set(smu, SMU_CAP(NPM_METRICS)); 367 } else if (!amdgpu_sriov_vf(smu->adev)) 368 smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS)); 369 } else { 370 smu_v13_0_12_tables_fini(smu); 371 } 372 } 373 374 static void smu_v13_0_6_init_caps(struct smu_context *smu) 375 { 376 enum smu_v13_0_6_caps default_cap_list[] = { SMU_CAP(DPM), 377 SMU_CAP(SET_UCLK_MAX), 378 SMU_CAP(DPM_POLICY), 379 SMU_CAP(PCIE_METRICS), 380 SMU_CAP(CTF_LIMIT), 381 SMU_CAP(MCA_DEBUG_MODE), 382 SMU_CAP(RMA_MSG), 383 SMU_CAP(ACA_SYND) }; 384 struct amdgpu_device *adev = smu->adev; 385 uint32_t fw_ver = smu->smc_fw_version; 386 uint32_t pgm = (fw_ver >> 24) & 0xFF; 387 388 for (int i = 0; i < ARRAY_SIZE(default_cap_list); i++) 389 smu_v13_0_6_cap_set(smu, default_cap_list[i]); 390 391 if (fw_ver < 0x552F00) 392 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM)); 393 if (fw_ver < 0x554500) 394 smu_v13_0_6_cap_clear(smu, SMU_CAP(CTF_LIMIT)); 395 396 if (adev->flags & AMD_IS_APU) { 397 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); 398 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); 399 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); 400 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); 401 402 if (fw_ver >= 0x04556A00) 403 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 404 } else { 405 if (fw_ver >= 0x557600) 406 smu_v13_0_6_cap_set(smu, SMU_CAP(OTHER_END_METRICS)); 407 if (fw_ver < 0x00556000) 408 smu_v13_0_6_cap_clear(smu, SMU_CAP(DPM_POLICY)); 409 if (amdgpu_sriov_vf(adev) && (fw_ver < 0x556600)) 410 smu_v13_0_6_cap_clear(smu, SMU_CAP(SET_UCLK_MAX)); 411 if (fw_ver < 0x556300) 412 smu_v13_0_6_cap_clear(smu, SMU_CAP(PCIE_METRICS)); 413 if (fw_ver < 0x554800) 414 smu_v13_0_6_cap_clear(smu, SMU_CAP(MCA_DEBUG_MODE)); 415 if (fw_ver >= 0x556F00) 416 smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS)); 417 if (fw_ver < 0x00555a00) 418 smu_v13_0_6_cap_clear(smu, SMU_CAP(RMA_MSG)); 419 if (fw_ver < 0x00555600) 420 smu_v13_0_6_cap_clear(smu, SMU_CAP(ACA_SYND)); 421 if ((pgm == 7 && fw_ver >= 0x7550E00) || 422 (pgm == 0 && fw_ver >= 0x00557E00)) 423 smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS)); 424 425 if (amdgpu_sriov_vf(adev)) { 426 if (fw_ver >= 0x00558200) 427 amdgpu_virt_attr_set(&adev->virt.virt_caps, 428 AMDGPU_VIRT_CAP_POWER_LIMIT, 429 AMDGPU_CAP_ATTR_RW); 430 if ((pgm == 0 && fw_ver >= 0x00558000) || 431 (pgm == 7 && fw_ver >= 0x7551000)) { 432 smu_v13_0_6_cap_set(smu, 433 SMU_CAP(STATIC_METRICS)); 434 smu_v13_0_6_cap_set(smu, 435 SMU_CAP(BOARD_VOLTAGE)); 436 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 437 } 438 } else { 439 if ((pgm == 0 && fw_ver >= 0x00557F01) || 440 (pgm == 7 && fw_ver >= 0x7551000)) { 441 smu_v13_0_6_cap_set(smu, 442 SMU_CAP(STATIC_METRICS)); 443 smu_v13_0_6_cap_set(smu, 444 SMU_CAP(BOARD_VOLTAGE)); 445 } 446 if ((pgm == 0 && fw_ver >= 0x00558000) || 447 (pgm == 7 && fw_ver >= 0x7551000)) 448 smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION)); 449 } 450 } 451 if (((pgm == 7) && (fw_ver >= 0x7550700)) || 452 ((pgm == 0) && (fw_ver >= 0x00557900)) || 453 ((pgm == 4) && (fw_ver >= 0x4557000))) 454 smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET)); 455 456 if ((pgm == 0 && fw_ver >= 0x00558200) || 457 (pgm == 7 && fw_ver >= 0x07551400)) 458 smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET)); 459 } 460 461 static void smu_v13_0_x_init_caps(struct smu_context *smu) 462 { 463 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 464 case IP_VERSION(13, 0, 12): 465 return smu_v13_0_12_init_caps(smu); 466 case IP_VERSION(13, 0, 14): 467 return smu_v13_0_14_init_caps(smu); 468 default: 469 return smu_v13_0_6_init_caps(smu); 470 } 471 } 472 473 static int smu_v13_0_6_check_fw_version(struct smu_context *smu) 474 { 475 int r; 476 477 r = smu_v13_0_check_fw_version(smu); 478 /* Initialize caps flags once fw version is fetched */ 479 if (!r) 480 smu_v13_0_x_init_caps(smu); 481 482 return r; 483 } 484 485 static int smu_v13_0_6_init_microcode(struct smu_context *smu) 486 { 487 const struct smc_firmware_header_v2_1 *v2_1; 488 const struct common_firmware_header *hdr; 489 struct amdgpu_firmware_info *ucode = NULL; 490 struct smc_soft_pptable_entry *entries; 491 struct amdgpu_device *adev = smu->adev; 492 uint32_t p2s_table_id = P2S_TABLE_ID_A; 493 int ret = 0, i, p2stable_count; 494 int var = (adev->pdev->device & 0xF); 495 char ucode_prefix[15]; 496 497 /* No need to load P2S tables in IOV mode or for smu v13.0.12 */ 498 if (amdgpu_sriov_vf(adev) || 499 (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))) 500 return 0; 501 502 if (!(adev->flags & AMD_IS_APU)) { 503 p2s_table_id = P2S_TABLE_ID_X; 504 if (var == 0x5) 505 p2s_table_id = P2S_TABLE_ID_3; 506 } 507 508 amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, 509 sizeof(ucode_prefix)); 510 ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, 511 "amdgpu/%s.bin", ucode_prefix); 512 if (ret) 513 goto out; 514 515 hdr = (const struct common_firmware_header *)adev->pm.fw->data; 516 amdgpu_ucode_print_smc_hdr(hdr); 517 518 /* SMU v13.0.6 binary file doesn't carry pptables, instead the entries 519 * are used to carry p2s tables. 520 */ 521 v2_1 = (const struct smc_firmware_header_v2_1 *)adev->pm.fw->data; 522 entries = (struct smc_soft_pptable_entry 523 *)((uint8_t *)v2_1 + 524 le32_to_cpu(v2_1->pptable_entry_offset)); 525 p2stable_count = le32_to_cpu(v2_1->pptable_count); 526 for (i = 0; i < p2stable_count; i++) { 527 if (le32_to_cpu(entries[i].id) == p2s_table_id) { 528 smu->pptable_firmware.data = 529 ((uint8_t *)v2_1 + 530 le32_to_cpu(entries[i].ppt_offset_bytes)); 531 smu->pptable_firmware.size = 532 le32_to_cpu(entries[i].ppt_size_bytes); 533 break; 534 } 535 } 536 537 if (smu->pptable_firmware.data && smu->pptable_firmware.size) { 538 ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; 539 ucode->ucode_id = AMDGPU_UCODE_ID_P2S_TABLE; 540 ucode->fw = &smu->pptable_firmware; 541 adev->firmware.fw_size += ALIGN(ucode->fw->size, PAGE_SIZE); 542 } 543 544 return 0; 545 out: 546 amdgpu_ucode_release(&adev->pm.fw); 547 548 return ret; 549 } 550 551 static int smu_v13_0_6_tables_init(struct smu_context *smu) 552 { 553 struct smu_table_context *smu_table = &smu->smu_table; 554 struct smu_table *tables = smu_table->tables; 555 struct smu_v13_0_6_gpu_metrics *gpu_metrics; 556 void *driver_pptable __free(kfree) = NULL; 557 void *metrics_table __free(kfree) = NULL; 558 struct amdgpu_device *adev = smu->adev; 559 int gpu_metrcs_size = METRICS_TABLE_SIZE; 560 int ret; 561 562 if (!(adev->flags & AMD_IS_APU)) 563 SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE, 564 PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); 565 566 SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, 567 max(gpu_metrcs_size, 568 smu_v13_0_12_get_max_metrics_size()), 569 PAGE_SIZE, 570 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 571 572 SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), 573 PAGE_SIZE, 574 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 575 576 SMU_TABLE_INIT(tables, SMU_TABLE_PMFW_SYSTEM_METRICS, 577 smu_v13_0_12_get_system_metrics_size(), PAGE_SIZE, 578 AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); 579 580 metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); 581 if (!metrics_table) 582 return -ENOMEM; 583 smu_table->metrics_time = 0; 584 585 driver_pptable = kzalloc(sizeof(struct PPTable_t), GFP_KERNEL); 586 if (!driver_pptable) 587 return -ENOMEM; 588 589 ret = smu_table_cache_init(smu, SMU_TABLE_SMU_METRICS, 590 sizeof(struct smu_v13_0_6_gpu_metrics), 1); 591 if (ret) 592 return ret; 593 594 gpu_metrics = (struct smu_v13_0_6_gpu_metrics 595 *)(tables[SMU_TABLE_SMU_METRICS].cache.buffer); 596 597 smu_v13_0_6_gpu_metrics_init(gpu_metrics, 1, 9); 598 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 599 IP_VERSION(13, 0, 12)) { 600 ret = smu_v13_0_12_tables_init(smu); 601 if (ret) { 602 smu_table_cache_fini(smu, SMU_TABLE_SMU_METRICS); 603 return ret; 604 } 605 } 606 607 smu_table->metrics_table = no_free_ptr(metrics_table); 608 smu_table->driver_pptable = no_free_ptr(driver_pptable); 609 610 return 0; 611 } 612 613 static int smu_v13_0_6_select_policy_soc_pstate(struct smu_context *smu, 614 int policy) 615 { 616 struct amdgpu_device *adev = smu->adev; 617 int ret, param; 618 619 switch (policy) { 620 case SOC_PSTATE_DEFAULT: 621 param = 0; 622 break; 623 case SOC_PSTATE_0: 624 param = 1; 625 break; 626 case SOC_PSTATE_1: 627 param = 2; 628 break; 629 case SOC_PSTATE_2: 630 param = 3; 631 break; 632 default: 633 return -EINVAL; 634 } 635 636 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetThrottlingPolicy, 637 param, NULL); 638 639 if (ret) 640 dev_err(adev->dev, "select soc pstate policy %d failed", 641 policy); 642 643 return ret; 644 } 645 646 static int smu_v13_0_6_select_plpd_policy(struct smu_context *smu, int level) 647 { 648 struct amdgpu_device *adev = smu->adev; 649 int ret, param; 650 651 switch (level) { 652 case XGMI_PLPD_DEFAULT: 653 param = PPSMC_PLPD_MODE_DEFAULT; 654 break; 655 case XGMI_PLPD_OPTIMIZED: 656 param = PPSMC_PLPD_MODE_OPTIMIZED; 657 break; 658 case XGMI_PLPD_DISALLOW: 659 param = 0; 660 break; 661 default: 662 return -EINVAL; 663 } 664 665 if (level == XGMI_PLPD_DISALLOW) 666 ret = smu_cmn_send_smc_msg_with_param( 667 smu, SMU_MSG_GmiPwrDnControl, param, NULL); 668 else 669 /* change xgmi per-link power down policy */ 670 ret = smu_cmn_send_smc_msg_with_param( 671 smu, SMU_MSG_SelectPLPDMode, param, NULL); 672 673 if (ret) 674 dev_err(adev->dev, 675 "select xgmi per-link power down policy %d failed\n", 676 level); 677 678 return ret; 679 } 680 681 static int smu_v13_0_6_allocate_dpm_context(struct smu_context *smu) 682 { 683 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 684 struct smu_dpm_policy *policy; 685 686 smu_dpm->dpm_context = 687 kzalloc(sizeof(struct smu_13_0_dpm_context), GFP_KERNEL); 688 if (!smu_dpm->dpm_context) 689 return -ENOMEM; 690 smu_dpm->dpm_context_size = sizeof(struct smu_13_0_dpm_context); 691 692 smu_dpm->dpm_policies = 693 kzalloc(sizeof(struct smu_dpm_policy_ctxt), GFP_KERNEL); 694 if (!smu_dpm->dpm_policies) { 695 kfree(smu_dpm->dpm_context); 696 return -ENOMEM; 697 } 698 699 if (!(smu->adev->flags & AMD_IS_APU)) { 700 policy = &(smu_dpm->dpm_policies->policies[0]); 701 702 policy->policy_type = PP_PM_POLICY_SOC_PSTATE; 703 policy->level_mask = BIT(SOC_PSTATE_DEFAULT) | 704 BIT(SOC_PSTATE_0) | BIT(SOC_PSTATE_1) | 705 BIT(SOC_PSTATE_2); 706 policy->current_level = SOC_PSTATE_DEFAULT; 707 policy->set_policy = smu_v13_0_6_select_policy_soc_pstate; 708 smu_cmn_generic_soc_policy_desc(policy); 709 smu_dpm->dpm_policies->policy_mask |= 710 BIT(PP_PM_POLICY_SOC_PSTATE); 711 } 712 policy = &(smu_dpm->dpm_policies->policies[1]); 713 714 policy->policy_type = PP_PM_POLICY_XGMI_PLPD; 715 policy->level_mask = BIT(XGMI_PLPD_DISALLOW) | BIT(XGMI_PLPD_DEFAULT) | 716 BIT(XGMI_PLPD_OPTIMIZED); 717 policy->current_level = XGMI_PLPD_DEFAULT; 718 policy->set_policy = smu_v13_0_6_select_plpd_policy; 719 smu_cmn_generic_plpd_policy_desc(policy); 720 smu_dpm->dpm_policies->policy_mask |= BIT(PP_PM_POLICY_XGMI_PLPD); 721 722 return 0; 723 } 724 725 static int smu_v13_0_6_init_smc_tables(struct smu_context *smu) 726 { 727 int ret = 0; 728 729 ret = smu_v13_0_6_tables_init(smu); 730 if (ret) 731 return ret; 732 733 ret = smu_v13_0_6_allocate_dpm_context(smu); 734 735 return ret; 736 } 737 738 static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu) 739 { 740 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) 741 smu_v13_0_12_tables_fini(smu); 742 smu_table_cache_fini(smu, SMU_TABLE_SMU_METRICS); 743 return smu_v13_0_fini_smc_tables(smu); 744 } 745 746 static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu, 747 uint32_t *feature_mask, 748 uint32_t num) 749 { 750 if (num > 2) 751 return -EINVAL; 752 753 /* pptable will handle the features to enable */ 754 memset(feature_mask, 0xFF, sizeof(uint32_t) * num); 755 756 return 0; 757 } 758 759 int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table, 760 bool bypass_cache) 761 { 762 struct smu_table_context *smu_table = &smu->smu_table; 763 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; 764 struct smu_table *table = &smu_table->driver_table; 765 int ret; 766 767 if (bypass_cache || !smu_table->metrics_time || 768 time_after(jiffies, 769 smu_table->metrics_time + msecs_to_jiffies(1))) { 770 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsTable, NULL); 771 if (ret) { 772 dev_info(smu->adev->dev, 773 "Failed to export SMU metrics table!\n"); 774 return ret; 775 } 776 777 amdgpu_hdp_invalidate(smu->adev, NULL); 778 memcpy(smu_table->metrics_table, table->cpu_addr, table_size); 779 780 smu_table->metrics_time = jiffies; 781 } 782 783 if (metrics_table) 784 memcpy(metrics_table, smu_table->metrics_table, table_size); 785 786 return 0; 787 } 788 789 static ssize_t smu_v13_0_6_get_pm_metrics(struct smu_context *smu, 790 void *metrics, size_t max_size) 791 { 792 struct smu_table_context *smu_tbl_ctxt = &smu->smu_table; 793 uint32_t table_version = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].version; 794 uint32_t table_size = smu_tbl_ctxt->tables[SMU_TABLE_SMU_METRICS].size; 795 struct amdgpu_pm_metrics *pm_metrics = metrics; 796 uint32_t pmfw_version; 797 int ret; 798 799 if (!pm_metrics || !max_size) 800 return -EINVAL; 801 802 if (max_size < (table_size + sizeof(pm_metrics->common_header))) 803 return -EOVERFLOW; 804 805 /* Don't use cached metrics data */ 806 ret = smu_v13_0_6_get_metrics_table(smu, pm_metrics->data, true); 807 if (ret) 808 return ret; 809 810 smu_cmn_get_smc_version(smu, NULL, &pmfw_version); 811 812 memset(&pm_metrics->common_header, 0, 813 sizeof(pm_metrics->common_header)); 814 pm_metrics->common_header.mp1_ip_discovery_version = 815 amdgpu_ip_version(smu->adev, MP1_HWIP, 0); 816 pm_metrics->common_header.pmfw_version = pmfw_version; 817 pm_metrics->common_header.pmmetrics_version = table_version; 818 pm_metrics->common_header.structure_size = 819 sizeof(pm_metrics->common_header) + table_size; 820 821 return pm_metrics->common_header.structure_size; 822 } 823 824 static void smu_v13_0_6_fill_static_metrics_table(struct smu_context *smu, 825 StaticMetricsTable_t *static_metrics) 826 { 827 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 828 829 if (!static_metrics->InputTelemetryVoltageInmV) { 830 dev_warn(smu->adev->dev, "Invalid board voltage %d\n", 831 static_metrics->InputTelemetryVoltageInmV); 832 } 833 834 dpm_context->board_volt = static_metrics->InputTelemetryVoltageInmV; 835 836 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PLDM_VERSION)) && 837 static_metrics->pldmVersion[0] != 0xFFFFFFFF) 838 smu->adev->firmware.pldm_version = 839 static_metrics->pldmVersion[0]; 840 } 841 842 int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu) 843 { 844 struct smu_table_context *smu_table = &smu->smu_table; 845 uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size; 846 struct smu_table *table = &smu_table->driver_table; 847 int ret; 848 849 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetStaticMetricsTable, NULL); 850 if (ret) { 851 dev_info(smu->adev->dev, 852 "Failed to export static metrics table!\n"); 853 return ret; 854 } 855 856 amdgpu_hdp_invalidate(smu->adev, NULL); 857 memcpy(smu_table->metrics_table, table->cpu_addr, table_size); 858 859 return 0; 860 } 861 862 static void smu_v13_0_6_update_caps(struct smu_context *smu) 863 { 864 struct smu_table_context *smu_table = &smu->smu_table; 865 struct PPTable_t *pptable = 866 (struct PPTable_t *)smu_table->driver_pptable; 867 868 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT)) && 869 !pptable->PPT1Max) 870 smu_v13_0_6_cap_clear(smu, SMU_CAP(FAST_PPT)); 871 } 872 873 static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu) 874 { 875 struct smu_table_context *smu_table = &smu->smu_table; 876 StaticMetricsTable_t *static_metrics = (StaticMetricsTable_t *)smu_table->metrics_table; 877 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 878 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 879 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 880 struct PPTable_t *pptable = 881 (struct PPTable_t *)smu_table->driver_pptable; 882 int version = smu_v13_0_6_get_metrics_version(smu); 883 int ret, i, retry = 100, n; 884 uint32_t table_version; 885 uint16_t max_speed; 886 uint8_t max_width; 887 888 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 889 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 890 ret = smu_v13_0_12_setup_driver_pptable(smu); 891 if (ret) 892 return ret; 893 goto out; 894 } 895 896 /* Store one-time values in driver PPTable */ 897 if (!pptable->Init) { 898 while (--retry) { 899 ret = smu_v13_0_6_get_metrics_table(smu, NULL, true); 900 if (ret) 901 return ret; 902 903 /* Ensure that metrics have been updated */ 904 if (GET_METRIC_FIELD(AccumulationCounter, version)) 905 break; 906 907 usleep_range(1000, 1100); 908 } 909 910 if (!retry) 911 return -ETIME; 912 913 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetMetricsVersion, 914 &table_version); 915 if (ret) 916 return ret; 917 smu_table->tables[SMU_TABLE_SMU_METRICS].version = 918 table_version; 919 920 pptable->MaxSocketPowerLimit = 921 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketPowerLimit, version)); 922 pptable->MaxGfxclkFrequency = 923 SMUQ10_ROUND(GET_METRIC_FIELD(MaxGfxclkFrequency, version)); 924 pptable->MinGfxclkFrequency = 925 SMUQ10_ROUND(GET_METRIC_FIELD(MinGfxclkFrequency, version)); 926 max_width = (uint8_t)GET_METRIC_FIELD(XgmiWidth, version); 927 max_speed = (uint16_t)GET_METRIC_FIELD(XgmiBitrate, version); 928 amgpu_xgmi_set_max_speed_width(smu->adev, max_speed, max_width); 929 930 for (i = 0; i < 4; ++i) { 931 pptable->FclkFrequencyTable[i] = 932 SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequencyTable, version)[i]); 933 pptable->UclkFrequencyTable[i] = 934 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequencyTable, version)[i]); 935 pptable->SocclkFrequencyTable[i] = SMUQ10_ROUND( 936 GET_METRIC_FIELD(SocclkFrequencyTable, version)[i]); 937 pptable->VclkFrequencyTable[i] = 938 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequencyTable, version)[i]); 939 pptable->DclkFrequencyTable[i] = 940 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequencyTable, version)[i]); 941 pptable->LclkFrequencyTable[i] = 942 SMUQ10_ROUND(GET_METRIC_FIELD(LclkFrequencyTable, version)[i]); 943 } 944 945 /* use AID0 serial number by default */ 946 pptable->PublicSerialNumber_AID = 947 GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0]; 948 949 amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC, 950 0, pptable->PublicSerialNumber_AID); 951 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID); 952 for (i = 0; i < n; i++) { 953 amdgpu_device_set_uid( 954 smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i, 955 GET_METRIC_FIELD(PublicSerialNumber_AID, 956 version)[i]); 957 } 958 n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD); 959 for (i = 0; i < n; i++) { 960 amdgpu_device_set_uid( 961 smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i, 962 GET_METRIC_FIELD(PublicSerialNumber_XCD, 963 version)[i]); 964 } 965 966 pptable->Init = true; 967 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 968 ret = smu_v13_0_6_get_static_metrics_table(smu); 969 if (ret) 970 return ret; 971 smu_v13_0_6_fill_static_metrics_table(smu, static_metrics); 972 } 973 } 974 out: 975 smu_v13_0_6_update_caps(smu); 976 return 0; 977 } 978 979 static int smu_v13_0_6_get_dpm_ultimate_freq(struct smu_context *smu, 980 enum smu_clk_type clk_type, 981 uint32_t *min, uint32_t *max) 982 { 983 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 984 struct smu_table_context *smu_table = &smu->smu_table; 985 struct PPTable_t *pptable = 986 (struct PPTable_t *)smu_table->driver_pptable; 987 struct smu_13_0_dpm_table *dpm_table; 988 uint32_t min_clk, max_clk, param; 989 int ret = 0, clk_id = 0; 990 991 /* Use dpm tables, if data is already fetched */ 992 if (pptable->Init) { 993 switch (clk_type) { 994 case SMU_MCLK: 995 case SMU_UCLK: 996 dpm_table = &dpm_context->dpm_tables.uclk_table; 997 break; 998 case SMU_GFXCLK: 999 case SMU_SCLK: 1000 dpm_table = &dpm_context->dpm_tables.gfx_table; 1001 break; 1002 case SMU_SOCCLK: 1003 dpm_table = &dpm_context->dpm_tables.soc_table; 1004 break; 1005 case SMU_FCLK: 1006 dpm_table = &dpm_context->dpm_tables.fclk_table; 1007 break; 1008 case SMU_VCLK: 1009 dpm_table = &dpm_context->dpm_tables.vclk_table; 1010 break; 1011 case SMU_DCLK: 1012 dpm_table = &dpm_context->dpm_tables.dclk_table; 1013 break; 1014 default: 1015 return -EINVAL; 1016 } 1017 1018 min_clk = dpm_table->min; 1019 max_clk = dpm_table->max; 1020 1021 if (min) 1022 *min = min_clk; 1023 if (max) 1024 *max = max_clk; 1025 1026 if (min_clk && max_clk) 1027 return 0; 1028 } 1029 1030 if (!(clk_type == SMU_GFXCLK || clk_type == SMU_SCLK)) { 1031 clk_id = smu_cmn_to_asic_specific_index( 1032 smu, CMN2ASIC_MAPPING_CLK, clk_type); 1033 if (clk_id < 0) { 1034 ret = -EINVAL; 1035 goto failed; 1036 } 1037 param = (clk_id & 0xffff) << 16; 1038 } 1039 1040 if (max) { 1041 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 1042 ret = smu_cmn_send_smc_msg( 1043 smu, SMU_MSG_GetMaxGfxclkFrequency, max); 1044 else 1045 ret = smu_cmn_send_smc_msg_with_param( 1046 smu, SMU_MSG_GetMaxDpmFreq, param, max); 1047 if (ret) 1048 goto failed; 1049 } 1050 1051 if (min) { 1052 if (clk_type == SMU_GFXCLK || clk_type == SMU_SCLK) 1053 ret = smu_cmn_send_smc_msg( 1054 smu, SMU_MSG_GetMinGfxclkFrequency, min); 1055 else 1056 ret = smu_cmn_send_smc_msg_with_param( 1057 smu, SMU_MSG_GetMinDpmFreq, param, min); 1058 } 1059 1060 failed: 1061 return ret; 1062 } 1063 1064 static int smu_v13_0_6_get_dpm_level_count(struct smu_context *smu, 1065 enum smu_clk_type clk_type, 1066 uint32_t *levels) 1067 { 1068 int ret; 1069 1070 ret = smu_v13_0_get_dpm_freq_by_index(smu, clk_type, 0xff, levels); 1071 if (!ret) 1072 ++(*levels); 1073 1074 return ret; 1075 } 1076 1077 static void smu_v13_0_6_pm_policy_init(struct smu_context *smu) 1078 { 1079 struct smu_dpm_policy *policy; 1080 1081 policy = smu_get_pm_policy(smu, PP_PM_POLICY_SOC_PSTATE); 1082 if (policy) 1083 policy->current_level = SOC_PSTATE_DEFAULT; 1084 } 1085 1086 static int smu_v13_0_6_set_default_dpm_table(struct smu_context *smu) 1087 { 1088 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1089 struct smu_table_context *smu_table = &smu->smu_table; 1090 struct smu_13_0_dpm_table *dpm_table = NULL; 1091 struct PPTable_t *pptable = 1092 (struct PPTable_t *)smu_table->driver_pptable; 1093 uint32_t gfxclkmin, gfxclkmax, levels; 1094 int ret = 0, i, j; 1095 struct smu_v13_0_6_dpm_map dpm_map[] = { 1096 { SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT, 1097 &dpm_context->dpm_tables.soc_table, 1098 pptable->SocclkFrequencyTable }, 1099 { SMU_UCLK, SMU_FEATURE_DPM_UCLK_BIT, 1100 &dpm_context->dpm_tables.uclk_table, 1101 pptable->UclkFrequencyTable }, 1102 { SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT, 1103 &dpm_context->dpm_tables.fclk_table, 1104 pptable->FclkFrequencyTable }, 1105 { SMU_VCLK, SMU_FEATURE_DPM_VCLK_BIT, 1106 &dpm_context->dpm_tables.vclk_table, 1107 pptable->VclkFrequencyTable }, 1108 { SMU_DCLK, SMU_FEATURE_DPM_DCLK_BIT, 1109 &dpm_context->dpm_tables.dclk_table, 1110 pptable->DclkFrequencyTable }, 1111 }; 1112 1113 smu_v13_0_6_setup_driver_pptable(smu); 1114 1115 /* DPM policy not supported in older firmwares */ 1116 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM_POLICY))) { 1117 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 1118 1119 smu_dpm->dpm_policies->policy_mask &= 1120 ~BIT(PP_PM_POLICY_SOC_PSTATE); 1121 } 1122 1123 smu_v13_0_6_pm_policy_init(smu); 1124 /* gfxclk dpm table setup */ 1125 dpm_table = &dpm_context->dpm_tables.gfx_table; 1126 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT)) { 1127 /* In the case of gfxclk, only fine-grained dpm is honored. 1128 * Get min/max values from FW. 1129 */ 1130 ret = smu_v13_0_6_get_dpm_ultimate_freq(smu, SMU_GFXCLK, 1131 &gfxclkmin, &gfxclkmax); 1132 if (ret) 1133 return ret; 1134 1135 dpm_table->count = 2; 1136 dpm_table->dpm_levels[0].value = gfxclkmin; 1137 dpm_table->dpm_levels[0].enabled = true; 1138 dpm_table->dpm_levels[1].value = gfxclkmax; 1139 dpm_table->dpm_levels[1].enabled = true; 1140 dpm_table->min = dpm_table->dpm_levels[0].value; 1141 dpm_table->max = dpm_table->dpm_levels[1].value; 1142 } else { 1143 dpm_table->count = 1; 1144 dpm_table->dpm_levels[0].value = pptable->MinGfxclkFrequency; 1145 dpm_table->dpm_levels[0].enabled = true; 1146 dpm_table->min = dpm_table->dpm_levels[0].value; 1147 dpm_table->max = dpm_table->dpm_levels[0].value; 1148 } 1149 1150 for (j = 0; j < ARRAY_SIZE(dpm_map); j++) { 1151 dpm_table = dpm_map[j].dpm_table; 1152 levels = 1; 1153 if (smu_cmn_feature_is_enabled(smu, dpm_map[j].feature_num)) { 1154 ret = smu_v13_0_6_get_dpm_level_count( 1155 smu, dpm_map[j].clk_type, &levels); 1156 if (ret) 1157 return ret; 1158 } 1159 dpm_table->count = levels; 1160 for (i = 0; i < dpm_table->count; ++i) { 1161 dpm_table->dpm_levels[i].value = 1162 dpm_map[j].freq_table[i]; 1163 dpm_table->dpm_levels[i].enabled = true; 1164 1165 } 1166 dpm_table->min = dpm_table->dpm_levels[0].value; 1167 dpm_table->max = dpm_table->dpm_levels[levels - 1].value; 1168 1169 } 1170 1171 return 0; 1172 } 1173 1174 static int smu_v13_0_6_setup_pptable(struct smu_context *smu) 1175 { 1176 struct smu_table_context *table_context = &smu->smu_table; 1177 1178 /* TODO: PPTable is not available. 1179 * 1) Find an alternate way to get 'PPTable values' here. 1180 * 2) Check if there is SW CTF 1181 */ 1182 table_context->thermal_controller_type = 0; 1183 1184 return 0; 1185 } 1186 1187 static int smu_v13_0_6_check_fw_status(struct smu_context *smu) 1188 { 1189 struct amdgpu_device *adev = smu->adev; 1190 uint32_t mp1_fw_flags; 1191 1192 mp1_fw_flags = 1193 RREG32_PCIE(MP1_Public | (smnMP1_FIRMWARE_FLAGS & 0xffffffff)); 1194 1195 if ((mp1_fw_flags & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >> 1196 MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT) 1197 return 0; 1198 1199 return -EIO; 1200 } 1201 1202 static int smu_v13_0_6_populate_umd_state_clk(struct smu_context *smu) 1203 { 1204 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1205 struct smu_13_0_dpm_table *gfx_table = 1206 &dpm_context->dpm_tables.gfx_table; 1207 struct smu_13_0_dpm_table *mem_table = 1208 &dpm_context->dpm_tables.uclk_table; 1209 struct smu_13_0_dpm_table *soc_table = 1210 &dpm_context->dpm_tables.soc_table; 1211 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1212 1213 pstate_table->gfxclk_pstate.min = gfx_table->min; 1214 pstate_table->gfxclk_pstate.peak = gfx_table->max; 1215 pstate_table->gfxclk_pstate.curr.min = gfx_table->min; 1216 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 1217 1218 pstate_table->uclk_pstate.min = mem_table->min; 1219 pstate_table->uclk_pstate.peak = mem_table->max; 1220 pstate_table->uclk_pstate.curr.min = mem_table->min; 1221 pstate_table->uclk_pstate.curr.max = mem_table->max; 1222 1223 pstate_table->socclk_pstate.min = soc_table->min; 1224 pstate_table->socclk_pstate.peak = soc_table->max; 1225 pstate_table->socclk_pstate.curr.min = soc_table->min; 1226 pstate_table->socclk_pstate.curr.max = soc_table->max; 1227 1228 if (gfx_table->count > SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL && 1229 mem_table->count > SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL && 1230 soc_table->count > SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL) { 1231 pstate_table->gfxclk_pstate.standard = 1232 gfx_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_GFXCLK_LEVEL].value; 1233 pstate_table->uclk_pstate.standard = 1234 mem_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_MCLK_LEVEL].value; 1235 pstate_table->socclk_pstate.standard = 1236 soc_table->dpm_levels[SMU_13_0_6_UMD_PSTATE_SOCCLK_LEVEL].value; 1237 } else { 1238 pstate_table->gfxclk_pstate.standard = 1239 pstate_table->gfxclk_pstate.min; 1240 pstate_table->uclk_pstate.standard = 1241 pstate_table->uclk_pstate.min; 1242 pstate_table->socclk_pstate.standard = 1243 pstate_table->socclk_pstate.min; 1244 } 1245 1246 return 0; 1247 } 1248 1249 static int smu_v13_0_6_get_clk_table(struct smu_context *smu, 1250 struct pp_clock_levels_with_latency *clocks, 1251 struct smu_13_0_dpm_table *dpm_table) 1252 { 1253 int i, count; 1254 1255 count = (dpm_table->count > MAX_NUM_CLOCKS) ? MAX_NUM_CLOCKS : 1256 dpm_table->count; 1257 clocks->num_levels = count; 1258 1259 for (i = 0; i < count; i++) { 1260 clocks->data[i].clocks_in_khz = 1261 dpm_table->dpm_levels[i].value * 1000; 1262 clocks->data[i].latency_in_us = 0; 1263 } 1264 1265 return 0; 1266 } 1267 1268 static int smu_v13_0_6_freqs_in_same_level(int32_t frequency1, 1269 int32_t frequency2) 1270 { 1271 return (abs(frequency1 - frequency2) <= EPSILON); 1272 } 1273 1274 static uint32_t smu_v13_0_6_get_throttler_status(struct smu_context *smu) 1275 { 1276 struct smu_power_context *smu_power = &smu->smu_power; 1277 struct smu_13_0_power_context *power_context = smu_power->power_context; 1278 uint32_t throttler_status = 0; 1279 1280 throttler_status = atomic_read(&power_context->throttle_status); 1281 dev_dbg(smu->adev->dev, "SMU Throttler status: %u", throttler_status); 1282 1283 return throttler_status; 1284 } 1285 1286 static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu, 1287 MetricsMember_t member, 1288 uint32_t *value) 1289 { 1290 struct smu_table_context *smu_table = &smu->smu_table; 1291 MetricsTableV0_t *metrics_v0 = (MetricsTableV0_t *)smu_table->metrics_table; 1292 MetricsTableV1_t *metrics_v1 = (MetricsTableV1_t *)smu_table->metrics_table; 1293 MetricsTableV2_t *metrics_v2 = (MetricsTableV2_t *)smu_table->metrics_table; 1294 int version = smu_v13_0_6_get_metrics_version(smu); 1295 struct amdgpu_device *adev = smu->adev; 1296 int ret = 0; 1297 int xcc_id; 1298 1299 ret = smu_v13_0_6_get_metrics_table(smu, NULL, false); 1300 if (ret) 1301 return ret; 1302 1303 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 1304 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) 1305 return smu_v13_0_12_get_smu_metrics_data(smu, member, value); 1306 1307 /* For clocks with multiple instances, only report the first one */ 1308 switch (member) { 1309 case METRICS_CURR_GFXCLK: 1310 case METRICS_AVERAGE_GFXCLK: 1311 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { 1312 xcc_id = GET_INST(GC, 0); 1313 *value = SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); 1314 } else { 1315 *value = 0; 1316 } 1317 break; 1318 case METRICS_CURR_SOCCLK: 1319 case METRICS_AVERAGE_SOCCLK: 1320 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[0]); 1321 break; 1322 case METRICS_CURR_UCLK: 1323 case METRICS_AVERAGE_UCLK: 1324 *value = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 1325 break; 1326 case METRICS_CURR_VCLK: 1327 *value = SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, version)[0]); 1328 break; 1329 case METRICS_CURR_DCLK: 1330 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, version)[0]); 1331 break; 1332 case METRICS_CURR_FCLK: 1333 *value = SMUQ10_ROUND(GET_METRIC_FIELD(FclkFrequency, version)); 1334 break; 1335 case METRICS_AVERAGE_GFXACTIVITY: 1336 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); 1337 break; 1338 case METRICS_AVERAGE_MEMACTIVITY: 1339 *value = SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); 1340 break; 1341 case METRICS_CURR_SOCKETPOWER: 1342 *value = SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)) << 8; 1343 break; 1344 case METRICS_TEMPERATURE_HOTSPOT: 1345 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)) * 1346 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1347 break; 1348 case METRICS_TEMPERATURE_MEM: 1349 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)) * 1350 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1351 break; 1352 /* This is the max of all VRs and not just SOC VR. 1353 * No need to define another data type for the same. 1354 */ 1355 case METRICS_TEMPERATURE_VRSOC: 1356 *value = SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)) * 1357 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 1358 break; 1359 default: 1360 *value = UINT_MAX; 1361 break; 1362 } 1363 1364 return ret; 1365 } 1366 1367 static int smu_v13_0_6_get_current_clk_freq_by_table(struct smu_context *smu, 1368 enum smu_clk_type clk_type, 1369 uint32_t *value) 1370 { 1371 MetricsMember_t member_type; 1372 1373 if (!value) 1374 return -EINVAL; 1375 1376 switch (clk_type) { 1377 case SMU_GFXCLK: 1378 member_type = METRICS_CURR_GFXCLK; 1379 break; 1380 case SMU_UCLK: 1381 member_type = METRICS_CURR_UCLK; 1382 break; 1383 case SMU_SOCCLK: 1384 member_type = METRICS_CURR_SOCCLK; 1385 break; 1386 case SMU_VCLK: 1387 member_type = METRICS_CURR_VCLK; 1388 break; 1389 case SMU_DCLK: 1390 member_type = METRICS_CURR_DCLK; 1391 break; 1392 case SMU_FCLK: 1393 member_type = METRICS_CURR_FCLK; 1394 break; 1395 default: 1396 return -EINVAL; 1397 } 1398 1399 return smu_v13_0_6_get_smu_metrics_data(smu, member_type, value); 1400 } 1401 1402 static int smu_v13_0_6_print_clks(struct smu_context *smu, char *buf, int size, 1403 struct smu_13_0_dpm_table *single_dpm_table, 1404 uint32_t curr_clk, const char *clk_name) 1405 { 1406 struct pp_clock_levels_with_latency clocks; 1407 int i, ret, level = -1; 1408 uint32_t clk1, clk2; 1409 1410 ret = smu_v13_0_6_get_clk_table(smu, &clocks, single_dpm_table); 1411 if (ret) { 1412 dev_err(smu->adev->dev, "Attempt to get %s clk levels failed!", 1413 clk_name); 1414 return ret; 1415 } 1416 1417 if (!clocks.num_levels) 1418 return -EINVAL; 1419 1420 if (curr_clk < SMU_13_0_6_DSCLK_THRESHOLD) { 1421 size += sysfs_emit_at(buf, size, "S: %uMhz *\n", curr_clk); 1422 for (i = 0; i < clocks.num_levels; i++) 1423 size += sysfs_emit_at(buf, size, "%d: %uMhz\n", i, 1424 clocks.data[i].clocks_in_khz / 1425 1000); 1426 1427 } else { 1428 if ((clocks.num_levels == 1) || 1429 (curr_clk < (clocks.data[0].clocks_in_khz / 1000))) 1430 level = 0; 1431 for (i = 0; i < clocks.num_levels; i++) { 1432 clk1 = clocks.data[i].clocks_in_khz / 1000; 1433 1434 if (i < (clocks.num_levels - 1)) 1435 clk2 = clocks.data[i + 1].clocks_in_khz / 1000; 1436 1437 if (curr_clk == clk1) { 1438 level = i; 1439 } else if (curr_clk >= clk1 && curr_clk < clk2) { 1440 level = (curr_clk - clk1) <= (clk2 - curr_clk) ? 1441 i : 1442 i + 1; 1443 } 1444 1445 size += sysfs_emit_at(buf, size, "%d: %uMhz %s\n", i, 1446 clk1, (level == i) ? "*" : ""); 1447 } 1448 } 1449 1450 return size; 1451 } 1452 1453 static int smu_v13_0_6_print_clk_levels(struct smu_context *smu, 1454 enum smu_clk_type type, char *buf) 1455 { 1456 int now, size = 0, start_offset = 0; 1457 int ret = 0; 1458 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 1459 struct smu_13_0_dpm_table *single_dpm_table; 1460 struct smu_dpm_context *smu_dpm = &smu->smu_dpm; 1461 struct smu_13_0_dpm_context *dpm_context = NULL; 1462 uint32_t min_clk, max_clk; 1463 1464 smu_cmn_get_sysfs_buf(&buf, &size); 1465 start_offset = size; 1466 1467 if (amdgpu_ras_intr_triggered()) { 1468 size += sysfs_emit_at(buf, size, "unavailable\n"); 1469 return size - start_offset; 1470 } 1471 1472 dpm_context = smu_dpm->dpm_context; 1473 1474 switch (type) { 1475 case SMU_OD_SCLK: 1476 size += sysfs_emit_at(buf, size, "%s:\n", "OD_SCLK"); 1477 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", 1478 pstate_table->gfxclk_pstate.curr.min, 1479 pstate_table->gfxclk_pstate.curr.max); 1480 break; 1481 case SMU_SCLK: 1482 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_GFXCLK, 1483 &now); 1484 if (ret) { 1485 dev_err(smu->adev->dev, 1486 "Attempt to get current gfx clk Failed!"); 1487 return ret; 1488 } 1489 1490 single_dpm_table = &(dpm_context->dpm_tables.gfx_table); 1491 min_clk = single_dpm_table->min; 1492 max_clk = single_dpm_table->max; 1493 1494 if (now < SMU_13_0_6_DSCLK_THRESHOLD) { 1495 size += sysfs_emit_at(buf, size, "S: %uMhz *\n", 1496 now); 1497 size += sysfs_emit_at(buf, size, "0: %uMhz\n", 1498 min_clk); 1499 size += sysfs_emit_at(buf, size, "1: %uMhz\n", 1500 max_clk); 1501 1502 } else if (!smu_v13_0_6_freqs_in_same_level(now, min_clk) && 1503 !smu_v13_0_6_freqs_in_same_level(now, max_clk)) { 1504 size += sysfs_emit_at(buf, size, "0: %uMhz\n", 1505 min_clk); 1506 size += sysfs_emit_at(buf, size, "1: %uMhz *\n", 1507 now); 1508 size += sysfs_emit_at(buf, size, "2: %uMhz\n", 1509 max_clk); 1510 } else { 1511 size += sysfs_emit_at(buf, size, "0: %uMhz %s\n", 1512 min_clk, 1513 smu_v13_0_6_freqs_in_same_level(now, min_clk) ? "*" : ""); 1514 size += sysfs_emit_at(buf, size, "1: %uMhz %s\n", 1515 max_clk, 1516 smu_v13_0_6_freqs_in_same_level(now, max_clk) ? "*" : ""); 1517 } 1518 1519 break; 1520 1521 case SMU_OD_MCLK: 1522 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SET_UCLK_MAX))) 1523 return 0; 1524 1525 size += sysfs_emit_at(buf, size, "%s:\n", "OD_MCLK"); 1526 size += sysfs_emit_at(buf, size, "0: %uMhz\n1: %uMhz\n", 1527 pstate_table->uclk_pstate.curr.min, 1528 pstate_table->uclk_pstate.curr.max); 1529 break; 1530 case SMU_MCLK: 1531 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_UCLK, 1532 &now); 1533 if (ret) { 1534 dev_err(smu->adev->dev, 1535 "Attempt to get current mclk Failed!"); 1536 return ret; 1537 } 1538 1539 single_dpm_table = &(dpm_context->dpm_tables.uclk_table); 1540 1541 ret = smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1542 now, "mclk"); 1543 if (ret < 0) 1544 return ret; 1545 1546 size += ret; 1547 break; 1548 case SMU_SOCCLK: 1549 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_SOCCLK, 1550 &now); 1551 if (ret) { 1552 dev_err(smu->adev->dev, 1553 "Attempt to get current socclk Failed!"); 1554 return ret; 1555 } 1556 1557 single_dpm_table = &(dpm_context->dpm_tables.soc_table); 1558 1559 ret = smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1560 now, "socclk"); 1561 if (ret < 0) 1562 return ret; 1563 1564 size += ret; 1565 break; 1566 case SMU_FCLK: 1567 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_FCLK, 1568 &now); 1569 if (ret) { 1570 dev_err(smu->adev->dev, 1571 "Attempt to get current fclk Failed!"); 1572 return ret; 1573 } 1574 1575 single_dpm_table = &(dpm_context->dpm_tables.fclk_table); 1576 1577 ret = smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1578 now, "fclk"); 1579 if (ret < 0) 1580 return ret; 1581 1582 size += ret; 1583 break; 1584 case SMU_VCLK: 1585 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_VCLK, 1586 &now); 1587 if (ret) { 1588 dev_err(smu->adev->dev, 1589 "Attempt to get current vclk Failed!"); 1590 return ret; 1591 } 1592 1593 single_dpm_table = &(dpm_context->dpm_tables.vclk_table); 1594 1595 ret = smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1596 now, "vclk"); 1597 if (ret < 0) 1598 return ret; 1599 1600 size += ret; 1601 break; 1602 case SMU_DCLK: 1603 ret = smu_v13_0_6_get_current_clk_freq_by_table(smu, SMU_DCLK, 1604 &now); 1605 if (ret) { 1606 dev_err(smu->adev->dev, 1607 "Attempt to get current dclk Failed!"); 1608 return ret; 1609 } 1610 1611 single_dpm_table = &(dpm_context->dpm_tables.dclk_table); 1612 1613 ret = smu_v13_0_6_print_clks(smu, buf, size, single_dpm_table, 1614 now, "dclk"); 1615 if (ret < 0) 1616 return ret; 1617 1618 size += ret; 1619 break; 1620 default: 1621 break; 1622 } 1623 1624 return size - start_offset; 1625 } 1626 1627 static int smu_v13_0_6_upload_dpm_level(struct smu_context *smu, bool max, 1628 uint32_t feature_mask, uint32_t level) 1629 { 1630 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1631 uint32_t freq; 1632 int ret = 0; 1633 1634 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && 1635 (feature_mask & FEATURE_MASK(FEATURE_DPM_GFXCLK))) { 1636 freq = dpm_context->dpm_tables.gfx_table.dpm_levels[level].value; 1637 ret = smu_cmn_send_smc_msg_with_param( 1638 smu, 1639 (max ? SMU_MSG_SetSoftMaxGfxClk : 1640 SMU_MSG_SetSoftMinGfxclk), 1641 freq & 0xffff, NULL); 1642 if (ret) { 1643 dev_err(smu->adev->dev, 1644 "Failed to set soft %s gfxclk !\n", 1645 max ? "max" : "min"); 1646 return ret; 1647 } 1648 } 1649 1650 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) && 1651 (feature_mask & FEATURE_MASK(FEATURE_DPM_UCLK))) { 1652 freq = dpm_context->dpm_tables.uclk_table.dpm_levels[level] 1653 .value; 1654 ret = smu_cmn_send_smc_msg_with_param( 1655 smu, 1656 (max ? SMU_MSG_SetSoftMaxByFreq : 1657 SMU_MSG_SetSoftMinByFreq), 1658 (PPCLK_UCLK << 16) | (freq & 0xffff), NULL); 1659 if (ret) { 1660 dev_err(smu->adev->dev, 1661 "Failed to set soft %s memclk !\n", 1662 max ? "max" : "min"); 1663 return ret; 1664 } 1665 } 1666 1667 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_SOCCLK_BIT) && 1668 (feature_mask & FEATURE_MASK(FEATURE_DPM_SOCCLK))) { 1669 freq = dpm_context->dpm_tables.soc_table.dpm_levels[level].value; 1670 ret = smu_cmn_send_smc_msg_with_param( 1671 smu, 1672 (max ? SMU_MSG_SetSoftMaxByFreq : 1673 SMU_MSG_SetSoftMinByFreq), 1674 (PPCLK_SOCCLK << 16) | (freq & 0xffff), NULL); 1675 if (ret) { 1676 dev_err(smu->adev->dev, 1677 "Failed to set soft %s socclk !\n", 1678 max ? "max" : "min"); 1679 return ret; 1680 } 1681 } 1682 1683 return ret; 1684 } 1685 1686 static int smu_v13_0_6_force_clk_levels(struct smu_context *smu, 1687 enum smu_clk_type type, uint32_t mask) 1688 { 1689 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1690 struct smu_13_0_dpm_table *single_dpm_table = NULL; 1691 uint32_t soft_min_level, soft_max_level; 1692 int ret = 0; 1693 1694 soft_min_level = mask ? (ffs(mask) - 1) : 0; 1695 soft_max_level = mask ? (fls(mask) - 1) : 0; 1696 1697 switch (type) { 1698 case SMU_SCLK: 1699 single_dpm_table = &(dpm_context->dpm_tables.gfx_table); 1700 if (soft_max_level >= single_dpm_table->count) { 1701 dev_err(smu->adev->dev, 1702 "Clock level specified %d is over max allowed %d\n", 1703 soft_max_level, single_dpm_table->count - 1); 1704 ret = -EINVAL; 1705 break; 1706 } 1707 1708 ret = smu_v13_0_6_upload_dpm_level( 1709 smu, false, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1710 soft_min_level); 1711 if (ret) { 1712 dev_err(smu->adev->dev, 1713 "Failed to upload boot level to lowest!\n"); 1714 break; 1715 } 1716 1717 ret = smu_v13_0_6_upload_dpm_level( 1718 smu, true, FEATURE_MASK(FEATURE_DPM_GFXCLK), 1719 soft_max_level); 1720 if (ret) 1721 dev_err(smu->adev->dev, 1722 "Failed to upload dpm max level to highest!\n"); 1723 1724 break; 1725 1726 case SMU_MCLK: 1727 case SMU_SOCCLK: 1728 case SMU_FCLK: 1729 /* 1730 * Should not arrive here since smu_13_0_6 does not 1731 * support mclk/socclk/fclk softmin/softmax settings 1732 */ 1733 ret = -EINVAL; 1734 break; 1735 1736 default: 1737 break; 1738 } 1739 1740 return ret; 1741 } 1742 1743 static int smu_v13_0_6_get_current_activity_percent(struct smu_context *smu, 1744 enum amd_pp_sensors sensor, 1745 uint32_t *value) 1746 { 1747 int ret = 0; 1748 1749 if (!value) 1750 return -EINVAL; 1751 1752 switch (sensor) { 1753 case AMDGPU_PP_SENSOR_GPU_LOAD: 1754 ret = smu_v13_0_6_get_smu_metrics_data( 1755 smu, METRICS_AVERAGE_GFXACTIVITY, value); 1756 break; 1757 case AMDGPU_PP_SENSOR_MEM_LOAD: 1758 ret = smu_v13_0_6_get_smu_metrics_data( 1759 smu, METRICS_AVERAGE_MEMACTIVITY, value); 1760 break; 1761 default: 1762 dev_err(smu->adev->dev, 1763 "Invalid sensor for retrieving clock activity\n"); 1764 return -EINVAL; 1765 } 1766 1767 return ret; 1768 } 1769 1770 static int smu_v13_0_6_thermal_get_temperature(struct smu_context *smu, 1771 enum amd_pp_sensors sensor, 1772 uint32_t *value) 1773 { 1774 int ret = 0; 1775 1776 if (!value) 1777 return -EINVAL; 1778 1779 switch (sensor) { 1780 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1781 ret = smu_v13_0_6_get_smu_metrics_data( 1782 smu, METRICS_TEMPERATURE_HOTSPOT, value); 1783 break; 1784 case AMDGPU_PP_SENSOR_MEM_TEMP: 1785 ret = smu_v13_0_6_get_smu_metrics_data( 1786 smu, METRICS_TEMPERATURE_MEM, value); 1787 break; 1788 default: 1789 dev_err(smu->adev->dev, "Invalid sensor for retrieving temp\n"); 1790 return -EINVAL; 1791 } 1792 1793 return ret; 1794 } 1795 1796 static int smu_v13_0_6_read_sensor(struct smu_context *smu, 1797 enum amd_pp_sensors sensor, void *data, 1798 uint32_t *size) 1799 { 1800 struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; 1801 int ret = 0; 1802 1803 if (amdgpu_ras_intr_triggered()) 1804 return 0; 1805 1806 if (!data || !size) 1807 return -EINVAL; 1808 1809 switch (sensor) { 1810 case AMDGPU_PP_SENSOR_MEM_LOAD: 1811 case AMDGPU_PP_SENSOR_GPU_LOAD: 1812 ret = smu_v13_0_6_get_current_activity_percent(smu, sensor, 1813 (uint32_t *)data); 1814 *size = 4; 1815 break; 1816 case AMDGPU_PP_SENSOR_GPU_INPUT_POWER: 1817 ret = smu_v13_0_6_get_smu_metrics_data(smu, 1818 METRICS_CURR_SOCKETPOWER, 1819 (uint32_t *)data); 1820 *size = 4; 1821 break; 1822 case AMDGPU_PP_SENSOR_HOTSPOT_TEMP: 1823 case AMDGPU_PP_SENSOR_MEM_TEMP: 1824 ret = smu_v13_0_6_thermal_get_temperature(smu, sensor, 1825 (uint32_t *)data); 1826 *size = 4; 1827 break; 1828 case AMDGPU_PP_SENSOR_GFX_MCLK: 1829 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1830 smu, SMU_UCLK, (uint32_t *)data); 1831 /* the output clock frequency in 10K unit */ 1832 *(uint32_t *)data *= 100; 1833 *size = 4; 1834 break; 1835 case AMDGPU_PP_SENSOR_GFX_SCLK: 1836 ret = smu_v13_0_6_get_current_clk_freq_by_table( 1837 smu, SMU_GFXCLK, (uint32_t *)data); 1838 *(uint32_t *)data *= 100; 1839 *size = 4; 1840 break; 1841 case AMDGPU_PP_SENSOR_VDDGFX: 1842 ret = smu_v13_0_get_gfx_vdd(smu, (uint32_t *)data); 1843 *size = 4; 1844 break; 1845 case AMDGPU_PP_SENSOR_VDDBOARD: 1846 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(BOARD_VOLTAGE))) { 1847 *(uint32_t *)data = dpm_context->board_volt; 1848 *size = 4; 1849 break; 1850 } else { 1851 ret = -EOPNOTSUPP; 1852 break; 1853 } 1854 case AMDGPU_PP_SENSOR_NODEPOWERLIMIT: 1855 case AMDGPU_PP_SENSOR_NODEPOWER: 1856 case AMDGPU_PP_SENSOR_GPPTRESIDENCY: 1857 case AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT: 1858 ret = smu_v13_0_12_get_npm_data(smu, sensor, (uint32_t *)data); 1859 if (ret) 1860 return ret; 1861 *size = 4; 1862 break; 1863 case AMDGPU_PP_SENSOR_GPU_AVG_POWER: 1864 default: 1865 ret = -EOPNOTSUPP; 1866 break; 1867 } 1868 1869 return ret; 1870 } 1871 1872 static int smu_v13_0_6_get_power_limit(struct smu_context *smu, 1873 uint32_t *current_power_limit, 1874 uint32_t *default_power_limit, 1875 uint32_t *max_power_limit, 1876 uint32_t *min_power_limit) 1877 { 1878 struct smu_table_context *smu_table = &smu->smu_table; 1879 struct PPTable_t *pptable = 1880 (struct PPTable_t *)smu_table->driver_pptable; 1881 uint32_t power_limit = 0; 1882 int ret; 1883 1884 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetPptLimit, &power_limit); 1885 1886 if (ret) { 1887 dev_err(smu->adev->dev, "Couldn't get PPT limit"); 1888 return -EINVAL; 1889 } 1890 1891 if (current_power_limit) 1892 *current_power_limit = power_limit; 1893 if (default_power_limit) 1894 *default_power_limit = pptable->MaxSocketPowerLimit; 1895 1896 if (max_power_limit) { 1897 *max_power_limit = pptable->MaxSocketPowerLimit; 1898 } 1899 1900 if (min_power_limit) 1901 *min_power_limit = 0; 1902 return 0; 1903 } 1904 1905 static int smu_v13_0_6_set_power_limit(struct smu_context *smu, 1906 enum smu_ppt_limit_type limit_type, 1907 uint32_t limit) 1908 { 1909 struct smu_table_context *smu_table = &smu->smu_table; 1910 struct PPTable_t *pptable = 1911 (struct PPTable_t *)smu_table->driver_pptable; 1912 int ret; 1913 1914 if (limit_type == SMU_FAST_PPT_LIMIT) { 1915 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) 1916 return -EOPNOTSUPP; 1917 if (limit > pptable->PPT1Max || limit < pptable->PPT1Min) { 1918 dev_err(smu->adev->dev, 1919 "New power limit (%d) should be between min %d max %d\n", 1920 limit, pptable->PPT1Min, pptable->PPT1Max); 1921 return -EINVAL; 1922 } 1923 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetFastPptLimit, 1924 limit, NULL); 1925 if (ret) 1926 dev_err(smu->adev->dev, "Set fast PPT limit failed!\n"); 1927 return ret; 1928 } 1929 1930 return smu_v13_0_set_power_limit(smu, limit_type, limit); 1931 } 1932 1933 static int smu_v13_0_6_get_ppt_limit(struct smu_context *smu, 1934 uint32_t *ppt_limit, 1935 enum smu_ppt_limit_type type, 1936 enum smu_ppt_limit_level level) 1937 { 1938 struct smu_table_context *smu_table = &smu->smu_table; 1939 struct PPTable_t *pptable = 1940 (struct PPTable_t *)smu_table->driver_pptable; 1941 int ret = 0; 1942 1943 if (type == SMU_FAST_PPT_LIMIT) { 1944 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(FAST_PPT))) 1945 return -EOPNOTSUPP; 1946 switch (level) { 1947 case SMU_PPT_LIMIT_MAX: 1948 *ppt_limit = pptable->PPT1Max; 1949 break; 1950 case SMU_PPT_LIMIT_CURRENT: 1951 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetFastPptLimit, ppt_limit); 1952 if (ret) 1953 dev_err(smu->adev->dev, "Get fast PPT limit failed!\n"); 1954 break; 1955 case SMU_PPT_LIMIT_DEFAULT: 1956 *ppt_limit = pptable->PPT1Default; 1957 break; 1958 case SMU_PPT_LIMIT_MIN: 1959 *ppt_limit = pptable->PPT1Min; 1960 break; 1961 default: 1962 return -EOPNOTSUPP; 1963 } 1964 return ret; 1965 } 1966 return -EOPNOTSUPP; 1967 } 1968 1969 static int smu_v13_0_6_irq_process(struct amdgpu_device *adev, 1970 struct amdgpu_irq_src *source, 1971 struct amdgpu_iv_entry *entry) 1972 { 1973 struct smu_context *smu = adev->powerplay.pp_handle; 1974 struct smu_power_context *smu_power = &smu->smu_power; 1975 struct smu_13_0_power_context *power_context = smu_power->power_context; 1976 uint32_t client_id = entry->client_id; 1977 uint32_t ctxid = entry->src_data[0]; 1978 uint32_t src_id = entry->src_id; 1979 uint32_t data; 1980 1981 if (client_id == SOC15_IH_CLIENTID_MP1) { 1982 if (src_id == IH_INTERRUPT_ID_TO_DRIVER) { 1983 /* ACK SMUToHost interrupt */ 1984 data = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 1985 data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1); 1986 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, data); 1987 /* 1988 * ctxid is used to distinguish different events for SMCToHost 1989 * interrupt. 1990 */ 1991 switch (ctxid) { 1992 case IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING: 1993 /* 1994 * Increment the throttle interrupt counter 1995 */ 1996 atomic64_inc(&smu->throttle_int_counter); 1997 1998 if (!atomic_read(&adev->throttling_logging_enabled)) 1999 return 0; 2000 2001 /* This uses the new method which fixes the 2002 * incorrect throttling status reporting 2003 * through metrics table. For older FWs, 2004 * it will be ignored. 2005 */ 2006 if (__ratelimit(&adev->throttling_logging_rs)) { 2007 atomic_set( 2008 &power_context->throttle_status, 2009 entry->src_data[1]); 2010 schedule_work(&smu->throttling_logging_work); 2011 } 2012 break; 2013 default: 2014 dev_dbg(adev->dev, "Unhandled context id %d from client:%d!\n", 2015 ctxid, client_id); 2016 break; 2017 } 2018 } 2019 } 2020 2021 return 0; 2022 } 2023 2024 static int smu_v13_0_6_set_irq_state(struct amdgpu_device *adev, 2025 struct amdgpu_irq_src *source, 2026 unsigned tyep, 2027 enum amdgpu_interrupt_state state) 2028 { 2029 uint32_t val = 0; 2030 2031 switch (state) { 2032 case AMDGPU_IRQ_STATE_DISABLE: 2033 /* For MP1 SW irqs */ 2034 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 2035 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 1); 2036 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 2037 2038 break; 2039 case AMDGPU_IRQ_STATE_ENABLE: 2040 /* For MP1 SW irqs */ 2041 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT); 2042 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, ID, 0xFE); 2043 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT, VALID, 0); 2044 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT, val); 2045 2046 val = RREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL); 2047 val = REG_SET_FIELD(val, MP1_SMN_IH_SW_INT_CTRL, INT_MASK, 0); 2048 WREG32_SOC15(MP1, 0, regMP1_SMN_IH_SW_INT_CTRL, val); 2049 2050 break; 2051 default: 2052 break; 2053 } 2054 2055 return 0; 2056 } 2057 2058 static const struct amdgpu_irq_src_funcs smu_v13_0_6_irq_funcs = { 2059 .set = smu_v13_0_6_set_irq_state, 2060 .process = smu_v13_0_6_irq_process, 2061 }; 2062 2063 static int smu_v13_0_6_register_irq_handler(struct smu_context *smu) 2064 { 2065 struct amdgpu_device *adev = smu->adev; 2066 struct amdgpu_irq_src *irq_src = &smu->irq_source; 2067 int ret = 0; 2068 2069 if (amdgpu_sriov_vf(adev)) 2070 return 0; 2071 2072 irq_src->num_types = 1; 2073 irq_src->funcs = &smu_v13_0_6_irq_funcs; 2074 2075 ret = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_MP1, 2076 IH_INTERRUPT_ID_TO_DRIVER, 2077 irq_src); 2078 if (ret) 2079 return ret; 2080 2081 return ret; 2082 } 2083 2084 static int smu_v13_0_6_notify_unload(struct smu_context *smu) 2085 { 2086 if (amdgpu_in_reset(smu->adev)) 2087 return 0; 2088 2089 dev_dbg(smu->adev->dev, "Notify PMFW about driver unload"); 2090 /* Ignore return, just intimate FW that driver is not going to be there */ 2091 smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL); 2092 2093 return 0; 2094 } 2095 2096 static int smu_v13_0_6_mca_set_debug_mode(struct smu_context *smu, bool enable) 2097 { 2098 /* NOTE: this ClearMcaOnRead message is only supported for smu version 85.72.0 or higher */ 2099 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(MCA_DEBUG_MODE))) 2100 return 0; 2101 2102 return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ClearMcaOnRead, 2103 enable ? 0 : ClearMcaOnRead_UE_FLAG_MASK | ClearMcaOnRead_CE_POLL_MASK, 2104 NULL); 2105 } 2106 2107 static int smu_v13_0_6_system_features_control(struct smu_context *smu, 2108 bool enable) 2109 { 2110 struct amdgpu_device *adev = smu->adev; 2111 int ret = 0; 2112 2113 if (amdgpu_sriov_vf(adev)) 2114 return 0; 2115 2116 if (enable) { 2117 if (!(adev->flags & AMD_IS_APU)) 2118 ret = smu_v13_0_system_features_control(smu, enable); 2119 } else { 2120 /* Notify FW that the device is no longer driver managed */ 2121 smu_v13_0_6_notify_unload(smu); 2122 } 2123 2124 return ret; 2125 } 2126 2127 static int smu_v13_0_6_set_gfx_soft_freq_limited_range(struct smu_context *smu, 2128 uint32_t min, 2129 uint32_t max) 2130 { 2131 int ret; 2132 2133 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, 2134 max & 0xffff, NULL); 2135 if (ret) 2136 return ret; 2137 2138 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMinGfxclk, 2139 min & 0xffff, NULL); 2140 2141 return ret; 2142 } 2143 2144 static int smu_v13_0_6_set_performance_level(struct smu_context *smu, 2145 enum amd_dpm_forced_level level) 2146 { 2147 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 2148 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 2149 struct smu_13_0_dpm_table *gfx_table = 2150 &dpm_context->dpm_tables.gfx_table; 2151 struct smu_13_0_dpm_table *uclk_table = 2152 &dpm_context->dpm_tables.uclk_table; 2153 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2154 int ret; 2155 2156 /* Disable determinism if switching to another mode */ 2157 if ((smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) && 2158 (level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) { 2159 smu_cmn_send_smc_msg(smu, SMU_MSG_DisableDeterminism, NULL); 2160 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 2161 } 2162 2163 switch (level) { 2164 case AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM: 2165 return 0; 2166 2167 case AMD_DPM_FORCED_LEVEL_AUTO: 2168 if ((gfx_table->min != pstate_table->gfxclk_pstate.curr.min) || 2169 (gfx_table->max != pstate_table->gfxclk_pstate.curr.max)) { 2170 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 2171 smu, gfx_table->min, gfx_table->max); 2172 if (ret) 2173 return ret; 2174 2175 pstate_table->gfxclk_pstate.curr.min = gfx_table->min; 2176 pstate_table->gfxclk_pstate.curr.max = gfx_table->max; 2177 } 2178 2179 if (uclk_table->max != pstate_table->uclk_pstate.curr.max) { 2180 /* Min UCLK is not expected to be changed */ 2181 ret = smu_v13_0_set_soft_freq_limited_range( 2182 smu, SMU_UCLK, 0, uclk_table->max, false); 2183 if (ret) 2184 return ret; 2185 pstate_table->uclk_pstate.curr.max = uclk_table->max; 2186 } 2187 smu_v13_0_reset_custom_level(smu); 2188 2189 return 0; 2190 case AMD_DPM_FORCED_LEVEL_MANUAL: 2191 return 0; 2192 default: 2193 break; 2194 } 2195 2196 return -EOPNOTSUPP; 2197 } 2198 2199 static int smu_v13_0_6_set_soft_freq_limited_range(struct smu_context *smu, 2200 enum smu_clk_type clk_type, 2201 uint32_t min, uint32_t max, 2202 bool automatic) 2203 { 2204 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 2205 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 2206 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2207 struct amdgpu_device *adev = smu->adev; 2208 uint32_t min_clk; 2209 uint32_t max_clk; 2210 int ret = 0; 2211 2212 if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK && 2213 clk_type != SMU_UCLK) 2214 return -EINVAL; 2215 2216 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 2217 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 2218 return -EINVAL; 2219 2220 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_MANUAL) { 2221 if (min >= max) { 2222 dev_err(smu->adev->dev, 2223 "Minimum clk should be less than the maximum allowed clock\n"); 2224 return -EINVAL; 2225 } 2226 2227 if (clk_type == SMU_GFXCLK) { 2228 if ((min == pstate_table->gfxclk_pstate.curr.min) && 2229 (max == pstate_table->gfxclk_pstate.curr.max)) 2230 return 0; 2231 2232 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range( 2233 smu, min, max); 2234 if (!ret) { 2235 pstate_table->gfxclk_pstate.curr.min = min; 2236 pstate_table->gfxclk_pstate.curr.max = max; 2237 } 2238 } 2239 2240 if (clk_type == SMU_UCLK) { 2241 if (max == pstate_table->uclk_pstate.curr.max) 2242 return 0; 2243 /* For VF, only allowed in FW versions 85.102 or greater */ 2244 if (!smu_v13_0_6_cap_supported(smu, 2245 SMU_CAP(SET_UCLK_MAX))) 2246 return -EOPNOTSUPP; 2247 /* Only max clock limiting is allowed for UCLK */ 2248 ret = smu_v13_0_set_soft_freq_limited_range( 2249 smu, SMU_UCLK, 0, max, false); 2250 if (!ret) 2251 pstate_table->uclk_pstate.curr.max = max; 2252 } 2253 2254 return ret; 2255 } 2256 2257 if (smu_dpm->dpm_level == AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { 2258 if (!max || (max < dpm_context->dpm_tables.gfx_table.min) || 2259 (max > dpm_context->dpm_tables.gfx_table.max)) { 2260 dev_warn( 2261 adev->dev, 2262 "Invalid max frequency %d MHz specified for determinism\n", 2263 max); 2264 return -EINVAL; 2265 } 2266 2267 /* Restore default min/max clocks and enable determinism */ 2268 min_clk = dpm_context->dpm_tables.gfx_table.min; 2269 max_clk = dpm_context->dpm_tables.gfx_table.max; 2270 ret = smu_v13_0_6_set_gfx_soft_freq_limited_range(smu, min_clk, 2271 max_clk); 2272 if (!ret) { 2273 usleep_range(500, 1000); 2274 ret = smu_cmn_send_smc_msg_with_param( 2275 smu, SMU_MSG_EnableDeterminism, max, NULL); 2276 if (ret) { 2277 dev_err(adev->dev, 2278 "Failed to enable determinism at GFX clock %d MHz\n", 2279 max); 2280 } else { 2281 pstate_table->gfxclk_pstate.curr.min = min_clk; 2282 pstate_table->gfxclk_pstate.curr.max = max; 2283 } 2284 } 2285 } 2286 2287 return ret; 2288 } 2289 2290 static int smu_v13_0_6_usr_edit_dpm_table(struct smu_context *smu, 2291 enum PP_OD_DPM_TABLE_COMMAND type, 2292 long input[], uint32_t size) 2293 { 2294 struct smu_dpm_context *smu_dpm = &(smu->smu_dpm); 2295 struct smu_13_0_dpm_context *dpm_context = smu_dpm->dpm_context; 2296 struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; 2297 uint32_t min_clk; 2298 uint32_t max_clk; 2299 int ret = 0; 2300 2301 /* Only allowed in manual or determinism mode */ 2302 if ((smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) && 2303 (smu_dpm->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)) 2304 return -EINVAL; 2305 2306 switch (type) { 2307 case PP_OD_EDIT_SCLK_VDDC_TABLE: 2308 if (size != 2) { 2309 dev_err(smu->adev->dev, 2310 "Input parameter number not correct\n"); 2311 return -EINVAL; 2312 } 2313 2314 if (input[0] == 0) { 2315 if (input[1] < dpm_context->dpm_tables.gfx_table.min) { 2316 dev_warn( 2317 smu->adev->dev, 2318 "Minimum GFX clk (%ld) MHz specified is less than the minimum allowed (%d) MHz\n", 2319 input[1], 2320 dpm_context->dpm_tables.gfx_table.min); 2321 pstate_table->gfxclk_pstate.custom.min = 2322 pstate_table->gfxclk_pstate.curr.min; 2323 return -EINVAL; 2324 } 2325 2326 pstate_table->gfxclk_pstate.custom.min = input[1]; 2327 } else if (input[0] == 1) { 2328 if (input[1] > dpm_context->dpm_tables.gfx_table.max) { 2329 dev_warn( 2330 smu->adev->dev, 2331 "Maximum GFX clk (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 2332 input[1], 2333 dpm_context->dpm_tables.gfx_table.max); 2334 pstate_table->gfxclk_pstate.custom.max = 2335 pstate_table->gfxclk_pstate.curr.max; 2336 return -EINVAL; 2337 } 2338 2339 pstate_table->gfxclk_pstate.custom.max = input[1]; 2340 } else { 2341 return -EINVAL; 2342 } 2343 break; 2344 case PP_OD_EDIT_MCLK_VDDC_TABLE: 2345 if (size != 2) { 2346 dev_err(smu->adev->dev, 2347 "Input parameter number not correct\n"); 2348 return -EINVAL; 2349 } 2350 2351 if (!smu_cmn_feature_is_enabled(smu, 2352 SMU_FEATURE_DPM_UCLK_BIT)) { 2353 dev_warn(smu->adev->dev, 2354 "UCLK_LIMITS setting not supported!\n"); 2355 return -EOPNOTSUPP; 2356 } 2357 2358 if (input[0] == 0) { 2359 dev_info(smu->adev->dev, 2360 "Setting min UCLK level is not supported"); 2361 return -EINVAL; 2362 } else if (input[0] == 1) { 2363 if (input[1] > dpm_context->dpm_tables.uclk_table.max) { 2364 dev_warn( 2365 smu->adev->dev, 2366 "Maximum UCLK (%ld) MHz specified is greater than the maximum allowed (%d) MHz\n", 2367 input[1], 2368 dpm_context->dpm_tables.uclk_table.max); 2369 pstate_table->uclk_pstate.custom.max = 2370 pstate_table->uclk_pstate.curr.max; 2371 return -EINVAL; 2372 } 2373 2374 pstate_table->uclk_pstate.custom.max = input[1]; 2375 } 2376 break; 2377 2378 case PP_OD_RESTORE_DEFAULT_TABLE: 2379 if (size != 0) { 2380 dev_err(smu->adev->dev, 2381 "Input parameter number not correct\n"); 2382 return -EINVAL; 2383 } else { 2384 /* Use the default frequencies for manual and determinism mode */ 2385 min_clk = dpm_context->dpm_tables.gfx_table.min; 2386 max_clk = dpm_context->dpm_tables.gfx_table.max; 2387 2388 ret = smu_v13_0_6_set_soft_freq_limited_range( 2389 smu, SMU_GFXCLK, min_clk, max_clk, false); 2390 2391 if (ret) 2392 return ret; 2393 2394 min_clk = dpm_context->dpm_tables.uclk_table.min; 2395 max_clk = dpm_context->dpm_tables.uclk_table.max; 2396 ret = smu_v13_0_6_set_soft_freq_limited_range( 2397 smu, SMU_UCLK, min_clk, max_clk, false); 2398 if (ret) 2399 return ret; 2400 smu_v13_0_reset_custom_level(smu); 2401 } 2402 break; 2403 case PP_OD_COMMIT_DPM_TABLE: 2404 if (size != 0) { 2405 dev_err(smu->adev->dev, 2406 "Input parameter number not correct\n"); 2407 return -EINVAL; 2408 } else { 2409 if (!pstate_table->gfxclk_pstate.custom.min) 2410 pstate_table->gfxclk_pstate.custom.min = 2411 pstate_table->gfxclk_pstate.curr.min; 2412 2413 if (!pstate_table->gfxclk_pstate.custom.max) 2414 pstate_table->gfxclk_pstate.custom.max = 2415 pstate_table->gfxclk_pstate.curr.max; 2416 2417 min_clk = pstate_table->gfxclk_pstate.custom.min; 2418 max_clk = pstate_table->gfxclk_pstate.custom.max; 2419 2420 ret = smu_v13_0_6_set_soft_freq_limited_range( 2421 smu, SMU_GFXCLK, min_clk, max_clk, false); 2422 2423 if (ret) 2424 return ret; 2425 2426 if (!pstate_table->uclk_pstate.custom.max) 2427 return 0; 2428 2429 min_clk = pstate_table->uclk_pstate.curr.min; 2430 max_clk = pstate_table->uclk_pstate.custom.max; 2431 return smu_v13_0_6_set_soft_freq_limited_range( 2432 smu, SMU_UCLK, min_clk, max_clk, false); 2433 } 2434 break; 2435 default: 2436 return -ENOSYS; 2437 } 2438 2439 return ret; 2440 } 2441 2442 static int smu_v13_0_6_get_enabled_mask(struct smu_context *smu, 2443 uint64_t *feature_mask) 2444 { 2445 int ret; 2446 2447 ret = smu_cmn_get_enabled_mask(smu, feature_mask); 2448 2449 if (ret == -EIO && !smu_v13_0_6_cap_supported(smu, SMU_CAP(DPM))) { 2450 *feature_mask = 0; 2451 ret = 0; 2452 } 2453 2454 return ret; 2455 } 2456 2457 static bool smu_v13_0_6_is_dpm_running(struct smu_context *smu) 2458 { 2459 int ret; 2460 uint64_t feature_enabled; 2461 2462 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) 2463 return smu_v13_0_12_is_dpm_running(smu); 2464 2465 ret = smu_v13_0_6_get_enabled_mask(smu, &feature_enabled); 2466 2467 if (ret) 2468 return false; 2469 2470 return !!(feature_enabled & SMC_DPM_FEATURE); 2471 } 2472 2473 static int smu_v13_0_6_request_i2c_xfer(struct smu_context *smu, 2474 void *table_data) 2475 { 2476 struct smu_table_context *smu_table = &smu->smu_table; 2477 struct smu_table *table = &smu_table->driver_table; 2478 struct amdgpu_device *adev = smu->adev; 2479 uint32_t table_size; 2480 int ret = 0; 2481 2482 if (!table_data) 2483 return -EINVAL; 2484 2485 table_size = smu_table->tables[SMU_TABLE_I2C_COMMANDS].size; 2486 2487 memcpy(table->cpu_addr, table_data, table_size); 2488 /* Flush hdp cache */ 2489 amdgpu_hdp_flush(adev, NULL); 2490 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RequestI2cTransaction, 2491 NULL); 2492 2493 return ret; 2494 } 2495 2496 static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap, 2497 struct i2c_msg *msg, int num_msgs) 2498 { 2499 struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c_adap); 2500 struct amdgpu_device *adev = smu_i2c->adev; 2501 struct smu_context *smu = adev->powerplay.pp_handle; 2502 struct smu_table_context *smu_table = &smu->smu_table; 2503 struct smu_table *table = &smu_table->driver_table; 2504 SwI2cRequest_t *req, *res = (SwI2cRequest_t *)table->cpu_addr; 2505 int i, j, r, c; 2506 u16 dir; 2507 2508 if (!adev->pm.dpm_enabled) 2509 return -EBUSY; 2510 2511 req = kzalloc(sizeof(*req), GFP_KERNEL); 2512 if (!req) 2513 return -ENOMEM; 2514 2515 req->I2CcontrollerPort = smu_i2c->port; 2516 req->I2CSpeed = I2C_SPEED_FAST_400K; 2517 req->SlaveAddress = msg[0].addr << 1; /* wants an 8-bit address */ 2518 dir = msg[0].flags & I2C_M_RD; 2519 2520 for (c = i = 0; i < num_msgs; i++) { 2521 for (j = 0; j < msg[i].len; j++, c++) { 2522 SwI2cCmd_t *cmd = &req->SwI2cCmds[c]; 2523 2524 if (!(msg[i].flags & I2C_M_RD)) { 2525 /* write */ 2526 cmd->CmdConfig |= CMDCONFIG_READWRITE_MASK; 2527 cmd->ReadWriteData = msg[i].buf[j]; 2528 } 2529 2530 if ((dir ^ msg[i].flags) & I2C_M_RD) { 2531 /* The direction changes. 2532 */ 2533 dir = msg[i].flags & I2C_M_RD; 2534 cmd->CmdConfig |= CMDCONFIG_RESTART_MASK; 2535 } 2536 2537 req->NumCmds++; 2538 2539 /* 2540 * Insert STOP if we are at the last byte of either last 2541 * message for the transaction or the client explicitly 2542 * requires a STOP at this particular message. 2543 */ 2544 if ((j == msg[i].len - 1) && 2545 ((i == num_msgs - 1) || (msg[i].flags & I2C_M_STOP))) { 2546 cmd->CmdConfig &= ~CMDCONFIG_RESTART_MASK; 2547 cmd->CmdConfig |= CMDCONFIG_STOP_MASK; 2548 } 2549 } 2550 } 2551 mutex_lock(&adev->pm.mutex); 2552 r = smu_v13_0_6_request_i2c_xfer(smu, req); 2553 if (r) { 2554 /* Retry once, in case of an i2c collision */ 2555 r = smu_v13_0_6_request_i2c_xfer(smu, req); 2556 if (r) 2557 goto fail; 2558 } 2559 2560 for (c = i = 0; i < num_msgs; i++) { 2561 if (!(msg[i].flags & I2C_M_RD)) { 2562 c += msg[i].len; 2563 continue; 2564 } 2565 for (j = 0; j < msg[i].len; j++, c++) { 2566 SwI2cCmd_t *cmd = &res->SwI2cCmds[c]; 2567 2568 msg[i].buf[j] = cmd->ReadWriteData; 2569 } 2570 } 2571 r = num_msgs; 2572 fail: 2573 mutex_unlock(&adev->pm.mutex); 2574 kfree(req); 2575 return r; 2576 } 2577 2578 static u32 smu_v13_0_6_i2c_func(struct i2c_adapter *adap) 2579 { 2580 return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; 2581 } 2582 2583 static const struct i2c_algorithm smu_v13_0_6_i2c_algo = { 2584 .master_xfer = smu_v13_0_6_i2c_xfer, 2585 .functionality = smu_v13_0_6_i2c_func, 2586 }; 2587 2588 static const struct i2c_adapter_quirks smu_v13_0_6_i2c_control_quirks = { 2589 .flags = I2C_AQ_COMB | I2C_AQ_COMB_SAME_ADDR | I2C_AQ_NO_ZERO_LEN, 2590 .max_read_len = MAX_SW_I2C_COMMANDS, 2591 .max_write_len = MAX_SW_I2C_COMMANDS, 2592 .max_comb_1st_msg_len = 2, 2593 .max_comb_2nd_msg_len = MAX_SW_I2C_COMMANDS - 2, 2594 }; 2595 2596 static int smu_v13_0_6_i2c_control_init(struct smu_context *smu) 2597 { 2598 struct amdgpu_device *adev = smu->adev; 2599 int res, i; 2600 2601 for (i = 0; i < MAX_SMU_I2C_BUSES; i++) { 2602 struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[i]; 2603 struct i2c_adapter *control = &smu_i2c->adapter; 2604 2605 smu_i2c->adev = adev; 2606 smu_i2c->port = i; 2607 mutex_init(&smu_i2c->mutex); 2608 control->owner = THIS_MODULE; 2609 control->dev.parent = &adev->pdev->dev; 2610 control->algo = &smu_v13_0_6_i2c_algo; 2611 snprintf(control->name, sizeof(control->name), "AMDGPU SMU %d", i); 2612 control->quirks = &smu_v13_0_6_i2c_control_quirks; 2613 i2c_set_adapdata(control, smu_i2c); 2614 2615 res = devm_i2c_add_adapter(adev->dev, control); 2616 if (res) { 2617 DRM_ERROR("Failed to register hw i2c, err: %d\n", res); 2618 return res; 2619 } 2620 } 2621 2622 adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2623 adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; 2624 2625 return 0; 2626 } 2627 2628 static void smu_v13_0_6_i2c_control_fini(struct smu_context *smu) 2629 { 2630 struct amdgpu_device *adev = smu->adev; 2631 2632 adev->pm.ras_eeprom_i2c_bus = NULL; 2633 adev->pm.fru_eeprom_i2c_bus = NULL; 2634 } 2635 2636 static void smu_v13_0_6_get_unique_id(struct smu_context *smu) 2637 { 2638 struct amdgpu_device *adev = smu->adev; 2639 struct smu_table_context *smu_table = &smu->smu_table; 2640 struct PPTable_t *pptable = 2641 (struct PPTable_t *)smu_table->driver_pptable; 2642 2643 adev->unique_id = pptable->PublicSerialNumber_AID; 2644 } 2645 2646 static int smu_v13_0_6_get_bamaco_support(struct smu_context *smu) 2647 { 2648 /* smu_13_0_6 does not support baco */ 2649 2650 return 0; 2651 } 2652 2653 static const char *const throttling_logging_label[] = { 2654 [THROTTLER_PROCHOT_BIT] = "Prochot", 2655 [THROTTLER_PPT_BIT] = "PPT", 2656 [THROTTLER_THERMAL_SOCKET_BIT] = "SOC", 2657 [THROTTLER_THERMAL_VR_BIT] = "VR", 2658 [THROTTLER_THERMAL_HBM_BIT] = "HBM" 2659 }; 2660 2661 static void smu_v13_0_6_log_thermal_throttling_event(struct smu_context *smu) 2662 { 2663 int throttler_idx, throttling_events = 0, buf_idx = 0; 2664 struct amdgpu_device *adev = smu->adev; 2665 uint32_t throttler_status; 2666 char log_buf[256]; 2667 2668 throttler_status = smu_v13_0_6_get_throttler_status(smu); 2669 if (!throttler_status) 2670 return; 2671 2672 memset(log_buf, 0, sizeof(log_buf)); 2673 for (throttler_idx = 0; 2674 throttler_idx < ARRAY_SIZE(throttling_logging_label); 2675 throttler_idx++) { 2676 if (throttler_status & (1U << throttler_idx)) { 2677 throttling_events++; 2678 buf_idx += snprintf( 2679 log_buf + buf_idx, sizeof(log_buf) - buf_idx, 2680 "%s%s", throttling_events > 1 ? " and " : "", 2681 throttling_logging_label[throttler_idx]); 2682 if (buf_idx >= sizeof(log_buf)) { 2683 dev_err(adev->dev, "buffer overflow!\n"); 2684 log_buf[sizeof(log_buf) - 1] = '\0'; 2685 break; 2686 } 2687 } 2688 } 2689 2690 dev_warn(adev->dev, 2691 "WARN: GPU is throttled, expect performance decrease. %s.\n", 2692 log_buf); 2693 kgd2kfd_smi_event_throttle( 2694 smu->adev->kfd.dev, 2695 smu_cmn_get_indep_throttler_status(throttler_status, 2696 smu_v13_0_6_throttler_map)); 2697 } 2698 2699 static int 2700 smu_v13_0_6_get_current_pcie_link_width_level(struct smu_context *smu) 2701 { 2702 struct amdgpu_device *adev = smu->adev; 2703 2704 return REG_GET_FIELD(RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL), 2705 PCIE_LC_LINK_WIDTH_CNTL, LC_LINK_WIDTH_RD); 2706 } 2707 2708 static int smu_v13_0_6_get_current_pcie_link_speed(struct smu_context *smu) 2709 { 2710 struct amdgpu_device *adev = smu->adev; 2711 uint32_t speed_level; 2712 uint32_t esm_ctrl; 2713 2714 /* TODO: confirm this on real target */ 2715 esm_ctrl = RREG32_PCIE(smnPCIE_ESM_CTRL); 2716 if ((esm_ctrl >> 15) & 0x1) 2717 return (((esm_ctrl >> 8) & 0x7F) + 128); 2718 2719 speed_level = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & 2720 PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) 2721 >> PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; 2722 if (speed_level > LINK_SPEED_MAX) 2723 speed_level = 0; 2724 2725 return pcie_gen_to_speed(speed_level + 1); 2726 } 2727 2728 static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id, 2729 void *table) 2730 { 2731 const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 2732 int version = smu_v13_0_6_get_metrics_version(smu); 2733 struct smu_v13_0_6_partition_metrics *xcp_metrics; 2734 MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; 2735 struct amdgpu_device *adev = smu->adev; 2736 int ret, inst, i, j, k, idx; 2737 MetricsTableV1_t *metrics_v1; 2738 MetricsTableV2_t *metrics_v2; 2739 struct amdgpu_xcp *xcp; 2740 u32 inst_mask; 2741 bool per_inst; 2742 2743 if (!table) 2744 return sizeof(*xcp_metrics); 2745 2746 for_each_xcp(adev->xcp_mgr, xcp, i) { 2747 if (xcp->id == xcp_id) 2748 break; 2749 } 2750 if (i == adev->xcp_mgr->num_xcps) 2751 return -EINVAL; 2752 2753 xcp_metrics = (struct smu_v13_0_6_partition_metrics *)table; 2754 smu_v13_0_6_partition_metrics_init(xcp_metrics, 1, 1); 2755 2756 metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); 2757 if (!metrics_v0) 2758 return -ENOMEM; 2759 2760 ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); 2761 if (ret) 2762 return ret; 2763 2764 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == 2765 IP_VERSION(13, 0, 12) && 2766 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) 2767 return smu_v13_0_12_get_xcp_metrics(smu, xcp, table, 2768 metrics_v0); 2769 2770 metrics_v1 = (MetricsTableV1_t *)metrics_v0; 2771 metrics_v2 = (MetricsTableV2_t *)metrics_v0; 2772 2773 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); 2774 2775 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); 2776 idx = 0; 2777 for_each_inst(k, inst_mask) { 2778 /* Both JPEG and VCN has same instances */ 2779 inst = GET_INST(VCN, k); 2780 2781 for (j = 0; j < num_jpeg_rings; ++j) { 2782 xcp_metrics->jpeg_busy[(idx * num_jpeg_rings) + j] = 2783 SMUQ10_ROUND(GET_METRIC_FIELD( 2784 JpegBusy, 2785 version)[(inst * num_jpeg_rings) + j]); 2786 } 2787 xcp_metrics->vcn_busy[idx] = 2788 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]); 2789 2790 xcp_metrics->current_vclk0[idx] = SMUQ10_ROUND( 2791 GET_METRIC_FIELD(VclkFrequency, version)[inst]); 2792 xcp_metrics->current_dclk0[idx] = SMUQ10_ROUND( 2793 GET_METRIC_FIELD(DclkFrequency, version)[inst]); 2794 xcp_metrics->current_socclk[idx] = SMUQ10_ROUND( 2795 GET_METRIC_FIELD(SocclkFrequency, version)[inst]); 2796 2797 idx++; 2798 } 2799 2800 xcp_metrics->current_uclk = 2801 SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 2802 2803 if (per_inst) { 2804 amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask); 2805 idx = 0; 2806 for_each_inst(k, inst_mask) { 2807 inst = GET_INST(GC, k); 2808 xcp_metrics->current_gfxclk[idx] = 2809 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, 2810 version)[inst]); 2811 2812 xcp_metrics->gfx_busy_inst[idx] = SMUQ10_ROUND( 2813 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]); 2814 xcp_metrics->gfx_busy_acc[idx] = SMUQ10_ROUND( 2815 GET_GPU_METRIC_FIELD(GfxBusyAcc, 2816 version)[inst]); 2817 if (smu_v13_0_6_cap_supported( 2818 smu, SMU_CAP(HST_LIMIT_METRICS))) { 2819 xcp_metrics->gfx_below_host_limit_ppt_acc 2820 [idx] = SMUQ10_ROUND( 2821 metrics_v0->GfxclkBelowHostLimitPptAcc 2822 [inst]); 2823 xcp_metrics->gfx_below_host_limit_thm_acc 2824 [idx] = SMUQ10_ROUND( 2825 metrics_v0->GfxclkBelowHostLimitThmAcc 2826 [inst]); 2827 xcp_metrics->gfx_low_utilization_acc 2828 [idx] = SMUQ10_ROUND( 2829 metrics_v0 2830 ->GfxclkLowUtilizationAcc[inst]); 2831 xcp_metrics->gfx_below_host_limit_total_acc 2832 [idx] = SMUQ10_ROUND( 2833 metrics_v0->GfxclkBelowHostLimitTotalAcc 2834 [inst]); 2835 } 2836 idx++; 2837 } 2838 } 2839 2840 return sizeof(*xcp_metrics); 2841 } 2842 2843 static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table) 2844 { 2845 struct smu_table_context *smu_table = &smu->smu_table; 2846 struct smu_table *tables = smu_table->tables; 2847 struct smu_v13_0_6_gpu_metrics *gpu_metrics; 2848 int version = smu_v13_0_6_get_metrics_version(smu); 2849 MetricsTableV0_t *metrics_v0 __free(kfree) = NULL; 2850 struct amdgpu_device *adev = smu->adev; 2851 int ret = 0, xcc_id, inst, i, j; 2852 MetricsTableV1_t *metrics_v1; 2853 MetricsTableV2_t *metrics_v2; 2854 u16 link_width_level; 2855 u8 num_jpeg_rings; 2856 bool per_inst; 2857 2858 metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL); 2859 ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false); 2860 if (ret) 2861 return ret; 2862 2863 metrics_v2 = (MetricsTableV2_t *)metrics_v0; 2864 gpu_metrics = (struct smu_v13_0_6_gpu_metrics 2865 *)(tables[SMU_TABLE_SMU_METRICS].cache.buffer); 2866 2867 if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) && 2868 smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) { 2869 smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0, 2870 gpu_metrics); 2871 goto fill; 2872 } 2873 2874 metrics_v1 = (MetricsTableV1_t *)metrics_v0; 2875 metrics_v2 = (MetricsTableV2_t *)metrics_v0; 2876 2877 gpu_metrics->temperature_hotspot = 2878 SMUQ10_ROUND(GET_METRIC_FIELD(MaxSocketTemperature, version)); 2879 /* Individual HBM stack temperature is not reported */ 2880 gpu_metrics->temperature_mem = 2881 SMUQ10_ROUND(GET_METRIC_FIELD(MaxHbmTemperature, version)); 2882 /* Reports max temperature of all voltage rails */ 2883 gpu_metrics->temperature_vrsoc = 2884 SMUQ10_ROUND(GET_METRIC_FIELD(MaxVrTemperature, version)); 2885 2886 gpu_metrics->average_gfx_activity = 2887 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusy, version)); 2888 gpu_metrics->average_umc_activity = 2889 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilization, version)); 2890 2891 gpu_metrics->mem_max_bandwidth = 2892 SMUQ10_ROUND(GET_METRIC_FIELD(MaxDramBandwidth, version)); 2893 2894 gpu_metrics->curr_socket_power = 2895 SMUQ10_ROUND(GET_METRIC_FIELD(SocketPower, version)); 2896 /* Energy counter reported in 15.259uJ (2^-16) units */ 2897 gpu_metrics->energy_accumulator = GET_METRIC_FIELD(SocketEnergyAcc, version); 2898 2899 for (i = 0; i < MAX_GFX_CLKS; i++) { 2900 xcc_id = GET_INST(GC, i); 2901 if (xcc_id >= 0) 2902 gpu_metrics->current_gfxclk[i] = 2903 SMUQ10_ROUND(GET_METRIC_FIELD(GfxclkFrequency, version)[xcc_id]); 2904 2905 if (i < MAX_CLKS) { 2906 gpu_metrics->current_socclk[i] = 2907 SMUQ10_ROUND(GET_METRIC_FIELD(SocclkFrequency, version)[i]); 2908 inst = GET_INST(VCN, i); 2909 if (inst >= 0) { 2910 gpu_metrics->current_vclk0[i] = 2911 SMUQ10_ROUND(GET_METRIC_FIELD(VclkFrequency, 2912 version)[inst]); 2913 gpu_metrics->current_dclk0[i] = 2914 SMUQ10_ROUND(GET_METRIC_FIELD(DclkFrequency, 2915 version)[inst]); 2916 } 2917 } 2918 } 2919 2920 gpu_metrics->current_uclk = SMUQ10_ROUND(GET_METRIC_FIELD(UclkFrequency, version)); 2921 2922 /* Total accumulated cycle counter */ 2923 gpu_metrics->accumulation_counter = GET_METRIC_FIELD(AccumulationCounter, version); 2924 2925 /* Accumulated throttler residencies */ 2926 gpu_metrics->prochot_residency_acc = GET_METRIC_FIELD(ProchotResidencyAcc, version); 2927 gpu_metrics->ppt_residency_acc = GET_METRIC_FIELD(PptResidencyAcc, version); 2928 gpu_metrics->socket_thm_residency_acc = GET_METRIC_FIELD(SocketThmResidencyAcc, version); 2929 gpu_metrics->vr_thm_residency_acc = GET_METRIC_FIELD(VrThmResidencyAcc, version); 2930 gpu_metrics->hbm_thm_residency_acc = 2931 GET_METRIC_FIELD(HbmThmResidencyAcc, version); 2932 2933 /* Clock Lock Status. Each bit corresponds to each GFXCLK instance */ 2934 gpu_metrics->gfxclk_lock_status = GET_METRIC_FIELD(GfxLockXCDMak, 2935 version) >> GET_INST(GC, 0); 2936 2937 if (!(adev->flags & AMD_IS_APU)) { 2938 /*Check smu version, PCIE link speed and width will be reported from pmfw metric 2939 * table for both pf & one vf for smu version 85.99.0 or higher else report only 2940 * for pf from registers 2941 */ 2942 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(PCIE_METRICS))) { 2943 gpu_metrics->pcie_link_width = GET_GPU_METRIC_FIELD(PCIeLinkWidth, version); 2944 gpu_metrics->pcie_link_speed = 2945 pcie_gen_to_speed(GET_GPU_METRIC_FIELD(PCIeLinkSpeed, version)); 2946 } else if (!amdgpu_sriov_vf(adev)) { 2947 link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu); 2948 if (link_width_level > MAX_LINK_WIDTH) 2949 link_width_level = 0; 2950 2951 gpu_metrics->pcie_link_width = 2952 DECODE_LANE_WIDTH(link_width_level); 2953 gpu_metrics->pcie_link_speed = 2954 smu_v13_0_6_get_current_pcie_link_speed(smu); 2955 } 2956 2957 gpu_metrics->pcie_bandwidth_acc = 2958 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidthAcc, version)[0]); 2959 gpu_metrics->pcie_bandwidth_inst = 2960 SMUQ10_ROUND(GET_GPU_METRIC_FIELD(PcieBandwidth, version)[0]); 2961 gpu_metrics->pcie_l0_to_recov_count_acc = 2962 GET_GPU_METRIC_FIELD(PCIeL0ToRecoveryCountAcc, version); 2963 gpu_metrics->pcie_replay_count_acc = 2964 GET_GPU_METRIC_FIELD(PCIenReplayAAcc, version); 2965 gpu_metrics->pcie_replay_rover_count_acc = 2966 GET_GPU_METRIC_FIELD(PCIenReplayARolloverCountAcc, version); 2967 gpu_metrics->pcie_nak_sent_count_acc = 2968 GET_GPU_METRIC_FIELD(PCIeNAKSentCountAcc, version); 2969 gpu_metrics->pcie_nak_rcvd_count_acc = 2970 GET_GPU_METRIC_FIELD(PCIeNAKReceivedCountAcc, version); 2971 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(OTHER_END_METRICS))) 2972 gpu_metrics->pcie_lc_perf_other_end_recovery = 2973 GET_GPU_METRIC_FIELD(PCIeOtherEndRecoveryAcc, version); 2974 2975 } 2976 2977 gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); 2978 2979 gpu_metrics->gfx_activity_acc = 2980 SMUQ10_ROUND(GET_METRIC_FIELD(SocketGfxBusyAcc, version)); 2981 gpu_metrics->mem_activity_acc = 2982 SMUQ10_ROUND(GET_METRIC_FIELD(DramBandwidthUtilizationAcc, version)); 2983 2984 for (i = 0; i < NUM_XGMI_LINKS; i++) { 2985 j = amdgpu_xgmi_get_ext_link(adev, i); 2986 if (j < 0 || j >= NUM_XGMI_LINKS) 2987 continue; 2988 gpu_metrics->xgmi_read_data_acc[j] = SMUQ10_ROUND( 2989 GET_METRIC_FIELD(XgmiReadDataSizeAcc, version)[i]); 2990 gpu_metrics->xgmi_write_data_acc[j] = SMUQ10_ROUND( 2991 GET_METRIC_FIELD(XgmiWriteDataSizeAcc, version)[i]); 2992 ret = amdgpu_get_xgmi_link_status(adev, i); 2993 if (ret >= 0) 2994 gpu_metrics->xgmi_link_status[j] = ret; 2995 } 2996 2997 per_inst = smu_v13_0_6_cap_supported(smu, SMU_CAP(PER_INST_METRICS)); 2998 2999 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; 3000 for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { 3001 inst = GET_INST(JPEG, i); 3002 for (j = 0; j < num_jpeg_rings; ++j) 3003 gpu_metrics->jpeg_busy[(i * num_jpeg_rings) + j] = 3004 SMUQ10_ROUND(GET_METRIC_FIELD( 3005 JpegBusy, 3006 version)[(inst * num_jpeg_rings) + j]); 3007 } 3008 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { 3009 inst = GET_INST(VCN, i); 3010 gpu_metrics->vcn_busy[i] = 3011 SMUQ10_ROUND(GET_METRIC_FIELD(VcnBusy, version)[inst]); 3012 } 3013 3014 if (per_inst) { 3015 for (i = 0; i < NUM_XCC(adev->gfx.xcc_mask); ++i) { 3016 inst = GET_INST(GC, i); 3017 gpu_metrics->gfx_busy_inst[i] = SMUQ10_ROUND( 3018 GET_GPU_METRIC_FIELD(GfxBusy, version)[inst]); 3019 gpu_metrics->gfx_busy_acc[i] = SMUQ10_ROUND( 3020 GET_GPU_METRIC_FIELD(GfxBusyAcc, 3021 version)[inst]); 3022 if (smu_v13_0_6_cap_supported( 3023 smu, SMU_CAP(HST_LIMIT_METRICS))) { 3024 gpu_metrics->gfx_below_host_limit_ppt_acc 3025 [i] = SMUQ10_ROUND( 3026 metrics_v0->GfxclkBelowHostLimitPptAcc 3027 [inst]); 3028 gpu_metrics->gfx_below_host_limit_thm_acc 3029 [i] = SMUQ10_ROUND( 3030 metrics_v0->GfxclkBelowHostLimitThmAcc 3031 [inst]); 3032 gpu_metrics->gfx_low_utilization_acc 3033 [i] = SMUQ10_ROUND( 3034 metrics_v0 3035 ->GfxclkLowUtilizationAcc[inst]); 3036 gpu_metrics->gfx_below_host_limit_total_acc 3037 [i] = SMUQ10_ROUND( 3038 metrics_v0->GfxclkBelowHostLimitTotalAcc 3039 [inst]); 3040 } 3041 } 3042 } 3043 3044 gpu_metrics->xgmi_link_width = GET_METRIC_FIELD(XgmiWidth, version); 3045 gpu_metrics->xgmi_link_speed = GET_METRIC_FIELD(XgmiBitrate, version); 3046 3047 gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version); 3048 3049 fill: 3050 *table = tables[SMU_TABLE_SMU_METRICS].cache.buffer; 3051 3052 return sizeof(*gpu_metrics); 3053 } 3054 3055 static void smu_v13_0_6_restore_pci_config(struct smu_context *smu) 3056 { 3057 struct amdgpu_device *adev = smu->adev; 3058 int i; 3059 3060 for (i = 0; i < 16; i++) 3061 pci_write_config_dword(adev->pdev, i * 4, 3062 adev->pdev->saved_config_space[i]); 3063 pci_restore_msi_state(adev->pdev); 3064 } 3065 3066 static int smu_v13_0_6_mode2_reset(struct smu_context *smu) 3067 { 3068 int ret = 0, index; 3069 struct amdgpu_device *adev = smu->adev; 3070 int timeout = 10; 3071 3072 index = smu_cmn_to_asic_specific_index(smu, CMN2ASIC_MAPPING_MSG, 3073 SMU_MSG_GfxDeviceDriverReset); 3074 if (index < 0) 3075 return index; 3076 3077 mutex_lock(&smu->message_lock); 3078 3079 ret = smu_cmn_send_msg_without_waiting(smu, (uint16_t)index, 3080 SMU_RESET_MODE_2); 3081 3082 /* Reset takes a bit longer, wait for 200ms. */ 3083 msleep(200); 3084 3085 dev_dbg(smu->adev->dev, "restore config space...\n"); 3086 /* Restore the config space saved during init */ 3087 amdgpu_device_load_pci_state(adev->pdev); 3088 3089 /* Certain platforms have switches which assign virtual BAR values to 3090 * devices. OS uses the virtual BAR values and device behind the switch 3091 * is assgined another BAR value. When device's config space registers 3092 * are queried, switch returns the virtual BAR values. When mode-2 reset 3093 * is performed, switch is unaware of it, and will continue to return 3094 * the same virtual values to the OS.This affects 3095 * pci_restore_config_space() API as it doesn't write the value saved if 3096 * the current value read from config space is the same as what is 3097 * saved. As a workaround, make sure the config space is restored 3098 * always. 3099 */ 3100 if (!(adev->flags & AMD_IS_APU)) 3101 smu_v13_0_6_restore_pci_config(smu); 3102 3103 dev_dbg(smu->adev->dev, "wait for reset ack\n"); 3104 do { 3105 ret = smu_cmn_wait_for_response(smu); 3106 /* Wait a bit more time for getting ACK */ 3107 if (ret == -ETIME) { 3108 --timeout; 3109 usleep_range(500, 1000); 3110 continue; 3111 } 3112 3113 if (ret) 3114 goto out; 3115 3116 } while (ret == -ETIME && timeout); 3117 3118 out: 3119 mutex_unlock(&smu->message_lock); 3120 3121 if (ret) 3122 dev_err(adev->dev, "failed to send mode2 reset, error code %d", 3123 ret); 3124 3125 return ret; 3126 } 3127 3128 static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu, 3129 struct smu_temperature_range *range) 3130 { 3131 struct amdgpu_device *adev = smu->adev; 3132 u32 aid_temp, xcd_temp, max_temp; 3133 u32 ccd_temp = 0; 3134 int ret; 3135 3136 if (amdgpu_sriov_vf(smu->adev)) 3137 return 0; 3138 3139 if (!range) 3140 return -EINVAL; 3141 3142 /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */ 3143 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(CTF_LIMIT))) 3144 return 0; 3145 3146 /* Get SOC Max operating temperature */ 3147 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3148 PPSMC_AID_THM_TYPE, &aid_temp); 3149 if (ret) 3150 goto failed; 3151 if (adev->flags & AMD_IS_APU) { 3152 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3153 PPSMC_CCD_THM_TYPE, &ccd_temp); 3154 if (ret) 3155 goto failed; 3156 } 3157 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3158 PPSMC_XCD_THM_TYPE, &xcd_temp); 3159 if (ret) 3160 goto failed; 3161 range->hotspot_emergency_max = max3(aid_temp, xcd_temp, ccd_temp) * 3162 SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3163 3164 /* Get HBM Max operating temperature */ 3165 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit, 3166 PPSMC_HBM_THM_TYPE, &max_temp); 3167 if (ret) 3168 goto failed; 3169 range->mem_emergency_max = 3170 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3171 3172 /* Get SOC thermal throttle limit */ 3173 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 3174 PPSMC_THROTTLING_LIMIT_TYPE_SOCKET, 3175 &max_temp); 3176 if (ret) 3177 goto failed; 3178 range->hotspot_crit_max = 3179 max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3180 3181 /* Get HBM thermal throttle limit */ 3182 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetThermalLimit, 3183 PPSMC_THROTTLING_LIMIT_TYPE_HBM, 3184 &max_temp); 3185 if (ret) 3186 goto failed; 3187 3188 range->mem_crit_max = max_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; 3189 3190 failed: 3191 return ret; 3192 } 3193 3194 static int smu_v13_0_6_mode1_reset(struct smu_context *smu) 3195 { 3196 struct amdgpu_device *adev = smu->adev; 3197 u32 fatal_err, param; 3198 int ret = 0; 3199 3200 fatal_err = 0; 3201 param = SMU_RESET_MODE_1; 3202 3203 /* fatal error triggered by ras, PMFW supports the flag */ 3204 if (amdgpu_ras_get_fed_status(adev)) 3205 fatal_err = 1; 3206 3207 param |= (fatal_err << 16); 3208 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, 3209 param, NULL); 3210 3211 if (!ret) 3212 msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); 3213 3214 return ret; 3215 } 3216 3217 static int smu_v13_0_6_link_reset(struct smu_context *smu) 3218 { 3219 int ret = 0; 3220 3221 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset, 3222 SMU_RESET_MODE_4, NULL); 3223 return ret; 3224 } 3225 3226 static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu) 3227 { 3228 return true; 3229 } 3230 3231 static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu) 3232 { 3233 struct amdgpu_device *adev = smu->adev; 3234 int var = (adev->pdev->device & 0xF); 3235 3236 if (var == 0x0 || var == 0x1 || var == 0x3) 3237 return true; 3238 3239 return false; 3240 } 3241 3242 static int smu_v13_0_6_smu_send_hbm_bad_page_num(struct smu_context *smu, 3243 uint32_t size) 3244 { 3245 int ret = 0; 3246 3247 /* message SMU to update the bad page number on SMUBUS */ 3248 ret = smu_cmn_send_smc_msg_with_param( 3249 smu, SMU_MSG_SetNumBadHbmPagesRetired, size, NULL); 3250 if (ret) 3251 dev_err(smu->adev->dev, 3252 "[%s] failed to message SMU to update HBM bad pages number\n", 3253 __func__); 3254 3255 return ret; 3256 } 3257 3258 static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) 3259 { 3260 int ret; 3261 3262 /* NOTE: the message is only valid on dGPU with pmfw 85.90.0 and above */ 3263 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(RMA_MSG))) 3264 return 0; 3265 3266 ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RmaDueToBadPageThreshold, NULL); 3267 if (ret) 3268 dev_err(smu->adev->dev, 3269 "[%s] failed to send BadPageThreshold event to SMU\n", 3270 __func__); 3271 3272 return ret; 3273 } 3274 3275 /** 3276 * smu_v13_0_6_reset_sdma_is_supported - Check if SDMA reset is supported 3277 * @smu: smu_context pointer 3278 * 3279 * This function checks if the SMU supports resetting the SDMA engine. 3280 * It returns false if the capability is not supported. 3281 */ 3282 static bool smu_v13_0_6_reset_sdma_is_supported(struct smu_context *smu) 3283 { 3284 bool ret = true; 3285 3286 if (!smu_v13_0_6_cap_supported(smu, SMU_CAP(SDMA_RESET))) { 3287 dev_info(smu->adev->dev, 3288 "SDMA reset capability is not supported\n"); 3289 ret = false; 3290 } 3291 3292 return ret; 3293 } 3294 3295 static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) 3296 { 3297 int ret = 0; 3298 3299 if (!smu_v13_0_6_reset_sdma_is_supported(smu)) 3300 return -EOPNOTSUPP; 3301 3302 ret = smu_cmn_send_smc_msg_with_param(smu, 3303 SMU_MSG_ResetSDMA, inst_mask, NULL); 3304 if (ret) 3305 dev_err(smu->adev->dev, 3306 "failed to send ResetSDMA event with mask 0x%x\n", 3307 inst_mask); 3308 3309 return ret; 3310 } 3311 3312 static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu) 3313 { 3314 return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET)); 3315 } 3316 3317 static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask) 3318 { 3319 int ret = 0; 3320 3321 ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ResetVCN, inst_mask, NULL); 3322 if (ret) 3323 dev_err(smu->adev->dev, 3324 "failed to send ResetVCN event with mask 0x%x\n", 3325 inst_mask); 3326 return ret; 3327 } 3328 3329 static int smu_v13_0_6_ras_send_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg) 3330 { 3331 int ret; 3332 3333 switch (msg) { 3334 case SMU_MSG_QueryValidMcaCount: 3335 case SMU_MSG_QueryValidMcaCeCount: 3336 case SMU_MSG_McaBankDumpDW: 3337 case SMU_MSG_McaBankCeDumpDW: 3338 case SMU_MSG_ClearMcaOnRead: 3339 ret = smu_cmn_send_smc_msg_with_param(smu, msg, param, read_arg); 3340 break; 3341 default: 3342 ret = -EPERM; 3343 } 3344 3345 return ret; 3346 } 3347 3348 static int smu_v13_0_6_post_init(struct smu_context *smu) 3349 { 3350 if (smu_v13_0_6_is_link_reset_supported(smu)) 3351 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__LINK_RESET); 3352 3353 if (smu_v13_0_6_reset_sdma_is_supported(smu)) 3354 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__SDMA_RESET); 3355 3356 if (smu_v13_0_6_reset_vcn_is_supported(smu)) 3357 smu_feature_cap_set(smu, SMU_FEATURE_CAP_ID__VCN_RESET); 3358 3359 return 0; 3360 } 3361 3362 static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 3363 { 3364 struct smu_context *smu = adev->powerplay.pp_handle; 3365 3366 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 3367 } 3368 3369 static int smu_v13_0_6_get_valid_mca_count(struct smu_context *smu, enum amdgpu_mca_error_type type, uint32_t *count) 3370 { 3371 uint32_t msg; 3372 int ret; 3373 3374 if (!count) 3375 return -EINVAL; 3376 3377 switch (type) { 3378 case AMDGPU_MCA_ERROR_TYPE_UE: 3379 msg = SMU_MSG_QueryValidMcaCount; 3380 break; 3381 case AMDGPU_MCA_ERROR_TYPE_CE: 3382 msg = SMU_MSG_QueryValidMcaCeCount; 3383 break; 3384 default: 3385 return -EINVAL; 3386 } 3387 3388 ret = smu_cmn_send_smc_msg(smu, msg, count); 3389 if (ret) { 3390 *count = 0; 3391 return ret; 3392 } 3393 3394 return 0; 3395 } 3396 3397 static int __smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 3398 int idx, int offset, uint32_t *val) 3399 { 3400 uint32_t msg, param; 3401 3402 switch (type) { 3403 case AMDGPU_MCA_ERROR_TYPE_UE: 3404 msg = SMU_MSG_McaBankDumpDW; 3405 break; 3406 case AMDGPU_MCA_ERROR_TYPE_CE: 3407 msg = SMU_MSG_McaBankCeDumpDW; 3408 break; 3409 default: 3410 return -EINVAL; 3411 } 3412 3413 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 3414 3415 return smu_cmn_send_smc_msg_with_param(smu, msg, param, val); 3416 } 3417 3418 static int smu_v13_0_6_mca_dump_bank(struct smu_context *smu, enum amdgpu_mca_error_type type, 3419 int idx, int offset, uint32_t *val, int count) 3420 { 3421 int ret, i; 3422 3423 if (!val) 3424 return -EINVAL; 3425 3426 for (i = 0; i < count; i++) { 3427 ret = __smu_v13_0_6_mca_dump_bank(smu, type, idx, offset + (i << 2), &val[i]); 3428 if (ret) 3429 return ret; 3430 } 3431 3432 return 0; 3433 } 3434 3435 static const struct mca_bank_ipid smu_v13_0_6_mca_ipid_table[AMDGPU_MCA_IP_COUNT] = { 3436 MCA_BANK_IPID(UMC, 0x96, 0x0), 3437 MCA_BANK_IPID(SMU, 0x01, 0x1), 3438 MCA_BANK_IPID(MP5, 0x01, 0x2), 3439 MCA_BANK_IPID(PCS_XGMI, 0x50, 0x0), 3440 }; 3441 3442 static void mca_bank_entry_info_decode(struct mca_bank_entry *entry, struct mca_bank_info *info) 3443 { 3444 u64 ipid = entry->regs[MCA_REG_IDX_IPID]; 3445 u32 instidhi, instid; 3446 3447 /* NOTE: All MCA IPID register share the same format, 3448 * so the driver can share the MCMP1 register header file. 3449 * */ 3450 3451 info->hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); 3452 info->mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); 3453 3454 /* 3455 * Unfied DieID Format: SAASS. A:AID, S:Socket. 3456 * Unfied DieID[4] = InstanceId[0] 3457 * Unfied DieID[0:3] = InstanceIdHi[0:3] 3458 */ 3459 instidhi = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi); 3460 instid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo); 3461 info->aid = ((instidhi >> 2) & 0x03); 3462 info->socket_id = ((instid & 0x1) << 2) | (instidhi & 0x03); 3463 } 3464 3465 static int mca_bank_read_reg(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 3466 int idx, int reg_idx, uint64_t *val) 3467 { 3468 struct smu_context *smu = adev->powerplay.pp_handle; 3469 uint32_t data[2] = {0, 0}; 3470 int ret; 3471 3472 if (!val || reg_idx >= MCA_REG_IDX_COUNT) 3473 return -EINVAL; 3474 3475 ret = smu_v13_0_6_mca_dump_bank(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 3476 if (ret) 3477 return ret; 3478 3479 *val = (uint64_t)data[1] << 32 | data[0]; 3480 3481 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 3482 type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 3483 3484 return 0; 3485 } 3486 3487 static int mca_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, 3488 int idx, struct mca_bank_entry *entry) 3489 { 3490 int i, ret; 3491 3492 /* NOTE: populated all mca register by default */ 3493 for (i = 0; i < ARRAY_SIZE(entry->regs); i++) { 3494 ret = mca_bank_read_reg(adev, type, idx, i, &entry->regs[i]); 3495 if (ret) 3496 return ret; 3497 } 3498 3499 entry->idx = idx; 3500 entry->type = type; 3501 3502 mca_bank_entry_info_decode(entry, &entry->info); 3503 3504 return 0; 3505 } 3506 3507 static int mca_decode_ipid_to_hwip(uint64_t val) 3508 { 3509 const struct mca_bank_ipid *ipid; 3510 uint16_t hwid, mcatype; 3511 int i; 3512 3513 hwid = REG_GET_FIELD(val, MCMP1_IPIDT0, HardwareID); 3514 mcatype = REG_GET_FIELD(val, MCMP1_IPIDT0, McaType); 3515 3516 for (i = 0; i < ARRAY_SIZE(smu_v13_0_6_mca_ipid_table); i++) { 3517 ipid = &smu_v13_0_6_mca_ipid_table[i]; 3518 3519 if (!ipid->hwid) 3520 continue; 3521 3522 if (ipid->hwid == hwid && ipid->mcatype == mcatype) 3523 return i; 3524 } 3525 3526 return AMDGPU_MCA_IP_UNKNOW; 3527 } 3528 3529 static int mca_umc_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3530 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3531 { 3532 uint64_t status0; 3533 uint32_t ext_error_code; 3534 uint32_t odecc_err_cnt; 3535 3536 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3537 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status0); 3538 odecc_err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 3539 3540 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3541 *count = 0; 3542 return 0; 3543 } 3544 3545 if (umc_v12_0_is_deferred_error(adev, status0) || 3546 umc_v12_0_is_uncorrectable_error(adev, status0) || 3547 umc_v12_0_is_correctable_error(adev, status0)) 3548 *count = (ext_error_code == 0) ? odecc_err_cnt : 1; 3549 3550 amdgpu_umc_update_ecc_status(adev, 3551 entry->regs[MCA_REG_IDX_STATUS], 3552 entry->regs[MCA_REG_IDX_IPID], 3553 entry->regs[MCA_REG_IDX_ADDR]); 3554 3555 return 0; 3556 } 3557 3558 static int mca_pcs_xgmi_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3559 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, 3560 uint32_t *count) 3561 { 3562 u32 ext_error_code; 3563 u32 err_cnt; 3564 3565 ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(entry->regs[MCA_REG_IDX_STATUS]); 3566 err_cnt = MCA_REG__MISC0__ERRCNT(entry->regs[MCA_REG_IDX_MISC0]); 3567 3568 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3569 (ext_error_code == 0 || ext_error_code == 9)) 3570 *count = err_cnt; 3571 else if (type == AMDGPU_MCA_ERROR_TYPE_CE && ext_error_code == 6) 3572 *count = err_cnt; 3573 3574 return 0; 3575 } 3576 3577 static bool mca_smu_check_error_code(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 3578 uint32_t errcode) 3579 { 3580 int i; 3581 3582 if (!mca_ras->err_code_count || !mca_ras->err_code_array) 3583 return true; 3584 3585 for (i = 0; i < mca_ras->err_code_count; i++) { 3586 if (errcode == mca_ras->err_code_array[i]) 3587 return true; 3588 } 3589 3590 return false; 3591 } 3592 3593 static int mca_gfx_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3594 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3595 { 3596 uint64_t status0, misc0; 3597 3598 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3599 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3600 *count = 0; 3601 return 0; 3602 } 3603 3604 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3605 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 3606 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 3607 *count = 1; 3608 return 0; 3609 } else { 3610 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 3611 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 3612 } 3613 3614 return 0; 3615 } 3616 3617 static int mca_smu_mca_get_err_count(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3618 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) 3619 { 3620 uint64_t status0, misc0; 3621 3622 status0 = entry->regs[MCA_REG_IDX_STATUS]; 3623 if (!REG_GET_FIELD(status0, MCMP1_STATUST0, Val)) { 3624 *count = 0; 3625 return 0; 3626 } 3627 3628 if (type == AMDGPU_MCA_ERROR_TYPE_UE && 3629 REG_GET_FIELD(status0, MCMP1_STATUST0, UC) == 1 && 3630 REG_GET_FIELD(status0, MCMP1_STATUST0, PCC) == 1) { 3631 if (count) 3632 *count = 1; 3633 return 0; 3634 } 3635 3636 misc0 = entry->regs[MCA_REG_IDX_MISC0]; 3637 *count = REG_GET_FIELD(misc0, MCMP1_MISC0T0, ErrCnt); 3638 3639 return 0; 3640 } 3641 3642 static bool mca_gfx_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3643 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3644 { 3645 uint32_t instlo; 3646 3647 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 3648 instlo &= GENMASK(31, 1); 3649 switch (instlo) { 3650 case 0x36430400: /* SMNAID XCD 0 */ 3651 case 0x38430400: /* SMNAID XCD 1 */ 3652 case 0x40430400: /* SMNXCD XCD 0, NOTE: FIXME: fix this error later */ 3653 return true; 3654 default: 3655 return false; 3656 } 3657 3658 return false; 3659 }; 3660 3661 static bool mca_smu_bank_is_valid(const struct mca_ras_info *mca_ras, struct amdgpu_device *adev, 3662 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3663 { 3664 struct smu_context *smu = adev->powerplay.pp_handle; 3665 uint32_t errcode, instlo; 3666 3667 instlo = REG_GET_FIELD(entry->regs[MCA_REG_IDX_IPID], MCMP1_IPIDT0, InstanceIdLo); 3668 instlo &= GENMASK(31, 1); 3669 if (instlo != 0x03b30400) 3670 return false; 3671 3672 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) { 3673 errcode = MCA_REG__SYND__ERRORINFORMATION(entry->regs[MCA_REG_IDX_SYND]); 3674 errcode &= 0xff; 3675 } else { 3676 errcode = REG_GET_FIELD(entry->regs[MCA_REG_IDX_STATUS], MCMP1_STATUST0, ErrorCode); 3677 } 3678 3679 return mca_smu_check_error_code(adev, mca_ras, errcode); 3680 } 3681 3682 static int sdma_err_codes[] = { CODE_SDMA0, CODE_SDMA1, CODE_SDMA2, CODE_SDMA3 }; 3683 static int mmhub_err_codes[] = { 3684 CODE_DAGB0, CODE_DAGB0 + 1, CODE_DAGB0 + 2, CODE_DAGB0 + 3, CODE_DAGB0 + 4, /* DAGB0-4 */ 3685 CODE_EA0, CODE_EA0 + 1, CODE_EA0 + 2, CODE_EA0 + 3, CODE_EA0 + 4, /* MMEA0-4*/ 3686 CODE_VML2, CODE_VML2_WALKER, CODE_MMCANE, 3687 }; 3688 3689 static int vcn_err_codes[] = { 3690 CODE_VIDD, CODE_VIDV, 3691 }; 3692 static int jpeg_err_codes[] = { 3693 CODE_JPEG0S, CODE_JPEG0D, CODE_JPEG1S, CODE_JPEG1D, 3694 CODE_JPEG2S, CODE_JPEG2D, CODE_JPEG3S, CODE_JPEG3D, 3695 CODE_JPEG4S, CODE_JPEG4D, CODE_JPEG5S, CODE_JPEG5D, 3696 CODE_JPEG6S, CODE_JPEG6D, CODE_JPEG7S, CODE_JPEG7D, 3697 }; 3698 3699 static const struct mca_ras_info mca_ras_table[] = { 3700 { 3701 .blkid = AMDGPU_RAS_BLOCK__UMC, 3702 .ip = AMDGPU_MCA_IP_UMC, 3703 .get_err_count = mca_umc_mca_get_err_count, 3704 }, { 3705 .blkid = AMDGPU_RAS_BLOCK__GFX, 3706 .ip = AMDGPU_MCA_IP_SMU, 3707 .get_err_count = mca_gfx_mca_get_err_count, 3708 .bank_is_valid = mca_gfx_smu_bank_is_valid, 3709 }, { 3710 .blkid = AMDGPU_RAS_BLOCK__SDMA, 3711 .ip = AMDGPU_MCA_IP_SMU, 3712 .err_code_array = sdma_err_codes, 3713 .err_code_count = ARRAY_SIZE(sdma_err_codes), 3714 .get_err_count = mca_smu_mca_get_err_count, 3715 .bank_is_valid = mca_smu_bank_is_valid, 3716 }, { 3717 .blkid = AMDGPU_RAS_BLOCK__MMHUB, 3718 .ip = AMDGPU_MCA_IP_SMU, 3719 .err_code_array = mmhub_err_codes, 3720 .err_code_count = ARRAY_SIZE(mmhub_err_codes), 3721 .get_err_count = mca_smu_mca_get_err_count, 3722 .bank_is_valid = mca_smu_bank_is_valid, 3723 }, { 3724 .blkid = AMDGPU_RAS_BLOCK__XGMI_WAFL, 3725 .ip = AMDGPU_MCA_IP_PCS_XGMI, 3726 .get_err_count = mca_pcs_xgmi_mca_get_err_count, 3727 }, { 3728 .blkid = AMDGPU_RAS_BLOCK__VCN, 3729 .ip = AMDGPU_MCA_IP_SMU, 3730 .err_code_array = vcn_err_codes, 3731 .err_code_count = ARRAY_SIZE(vcn_err_codes), 3732 .get_err_count = mca_smu_mca_get_err_count, 3733 .bank_is_valid = mca_smu_bank_is_valid, 3734 }, { 3735 .blkid = AMDGPU_RAS_BLOCK__JPEG, 3736 .ip = AMDGPU_MCA_IP_SMU, 3737 .err_code_array = jpeg_err_codes, 3738 .err_code_count = ARRAY_SIZE(jpeg_err_codes), 3739 .get_err_count = mca_smu_mca_get_err_count, 3740 .bank_is_valid = mca_smu_bank_is_valid, 3741 }, 3742 }; 3743 3744 static const struct mca_ras_info *mca_get_mca_ras_info(struct amdgpu_device *adev, enum amdgpu_ras_block blkid) 3745 { 3746 int i; 3747 3748 for (i = 0; i < ARRAY_SIZE(mca_ras_table); i++) { 3749 if (mca_ras_table[i].blkid == blkid) 3750 return &mca_ras_table[i]; 3751 } 3752 3753 return NULL; 3754 } 3755 3756 static int mca_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) 3757 { 3758 struct smu_context *smu = adev->powerplay.pp_handle; 3759 int ret; 3760 3761 switch (type) { 3762 case AMDGPU_MCA_ERROR_TYPE_UE: 3763 case AMDGPU_MCA_ERROR_TYPE_CE: 3764 ret = smu_v13_0_6_get_valid_mca_count(smu, type, count); 3765 break; 3766 default: 3767 ret = -EINVAL; 3768 break; 3769 } 3770 3771 return ret; 3772 } 3773 3774 static bool mca_bank_is_valid(struct amdgpu_device *adev, const struct mca_ras_info *mca_ras, 3775 enum amdgpu_mca_error_type type, struct mca_bank_entry *entry) 3776 { 3777 if (mca_decode_ipid_to_hwip(entry->regs[MCA_REG_IDX_IPID]) != mca_ras->ip) 3778 return false; 3779 3780 if (mca_ras->bank_is_valid) 3781 return mca_ras->bank_is_valid(mca_ras, adev, type, entry); 3782 3783 return true; 3784 } 3785 3786 static int mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, 3787 struct mca_bank_entry *entry, uint32_t *count) 3788 { 3789 const struct mca_ras_info *mca_ras; 3790 3791 if (!entry || !count) 3792 return -EINVAL; 3793 3794 mca_ras = mca_get_mca_ras_info(adev, blk); 3795 if (!mca_ras) 3796 return -EOPNOTSUPP; 3797 3798 if (!mca_bank_is_valid(adev, mca_ras, type, entry)) { 3799 *count = 0; 3800 return 0; 3801 } 3802 3803 return mca_ras->get_err_count(mca_ras, adev, type, entry, count); 3804 } 3805 3806 static int mca_smu_get_mca_entry(struct amdgpu_device *adev, 3807 enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry) 3808 { 3809 return mca_get_mca_entry(adev, type, idx, entry); 3810 } 3811 3812 static int mca_smu_get_valid_mca_count(struct amdgpu_device *adev, 3813 enum amdgpu_mca_error_type type, uint32_t *count) 3814 { 3815 return mca_get_valid_mca_count(adev, type, count); 3816 } 3817 3818 static const struct amdgpu_mca_smu_funcs smu_v13_0_6_mca_smu_funcs = { 3819 .max_ue_count = 12, 3820 .max_ce_count = 12, 3821 .mca_set_debug_mode = mca_smu_set_debug_mode, 3822 .mca_parse_mca_error_count = mca_smu_parse_mca_error_count, 3823 .mca_get_mca_entry = mca_smu_get_mca_entry, 3824 .mca_get_valid_mca_count = mca_smu_get_valid_mca_count, 3825 }; 3826 3827 static int aca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) 3828 { 3829 struct smu_context *smu = adev->powerplay.pp_handle; 3830 3831 return smu_v13_0_6_mca_set_debug_mode(smu, enable); 3832 } 3833 3834 static int smu_v13_0_6_get_valid_aca_count(struct smu_context *smu, enum aca_smu_type type, u32 *count) 3835 { 3836 uint32_t msg; 3837 int ret; 3838 3839 if (!count) 3840 return -EINVAL; 3841 3842 switch (type) { 3843 case ACA_SMU_TYPE_UE: 3844 msg = SMU_MSG_QueryValidMcaCount; 3845 break; 3846 case ACA_SMU_TYPE_CE: 3847 msg = SMU_MSG_QueryValidMcaCeCount; 3848 break; 3849 default: 3850 return -EINVAL; 3851 } 3852 3853 ret = smu_cmn_send_smc_msg(smu, msg, count); 3854 if (ret) { 3855 *count = 0; 3856 return ret; 3857 } 3858 3859 return 0; 3860 } 3861 3862 static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, 3863 enum aca_smu_type type, u32 *count) 3864 { 3865 struct smu_context *smu = adev->powerplay.pp_handle; 3866 int ret; 3867 3868 switch (type) { 3869 case ACA_SMU_TYPE_UE: 3870 case ACA_SMU_TYPE_CE: 3871 ret = smu_v13_0_6_get_valid_aca_count(smu, type, count); 3872 break; 3873 default: 3874 ret = -EINVAL; 3875 break; 3876 } 3877 3878 return ret; 3879 } 3880 3881 static int __smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, 3882 int idx, int offset, u32 *val) 3883 { 3884 uint32_t msg, param; 3885 3886 switch (type) { 3887 case ACA_SMU_TYPE_UE: 3888 msg = SMU_MSG_McaBankDumpDW; 3889 break; 3890 case ACA_SMU_TYPE_CE: 3891 msg = SMU_MSG_McaBankCeDumpDW; 3892 break; 3893 default: 3894 return -EINVAL; 3895 } 3896 3897 param = ((idx & 0xffff) << 16) | (offset & 0xfffc); 3898 3899 return smu_cmn_send_smc_msg_with_param(smu, msg, param, (uint32_t *)val); 3900 } 3901 3902 static int smu_v13_0_6_aca_bank_dump(struct smu_context *smu, enum aca_smu_type type, 3903 int idx, int offset, u32 *val, int count) 3904 { 3905 int ret, i; 3906 3907 if (!val) 3908 return -EINVAL; 3909 3910 for (i = 0; i < count; i++) { 3911 ret = __smu_v13_0_6_aca_bank_dump(smu, type, idx, offset + (i << 2), &val[i]); 3912 if (ret) 3913 return ret; 3914 } 3915 3916 return 0; 3917 } 3918 3919 static int aca_bank_read_reg(struct amdgpu_device *adev, enum aca_smu_type type, 3920 int idx, int reg_idx, u64 *val) 3921 { 3922 struct smu_context *smu = adev->powerplay.pp_handle; 3923 u32 data[2] = {0, 0}; 3924 int ret; 3925 3926 if (!val || reg_idx >= ACA_REG_IDX_COUNT) 3927 return -EINVAL; 3928 3929 ret = smu_v13_0_6_aca_bank_dump(smu, type, idx, reg_idx * 8, data, ARRAY_SIZE(data)); 3930 if (ret) 3931 return ret; 3932 3933 *val = (u64)data[1] << 32 | data[0]; 3934 3935 dev_dbg(adev->dev, "mca read bank reg: type:%s, index: %d, reg_idx: %d, val: 0x%016llx\n", 3936 type == ACA_SMU_TYPE_UE ? "UE" : "CE", idx, reg_idx, *val); 3937 3938 return 0; 3939 } 3940 3941 static int aca_smu_get_valid_aca_bank(struct amdgpu_device *adev, 3942 enum aca_smu_type type, int idx, struct aca_bank *bank) 3943 { 3944 int i, ret, count; 3945 3946 count = min_t(int, 16, ARRAY_SIZE(bank->regs)); 3947 for (i = 0; i < count; i++) { 3948 ret = aca_bank_read_reg(adev, type, idx, i, &bank->regs[i]); 3949 if (ret) 3950 return ret; 3951 } 3952 3953 return 0; 3954 } 3955 3956 static int aca_smu_parse_error_code(struct amdgpu_device *adev, struct aca_bank *bank) 3957 { 3958 struct smu_context *smu = adev->powerplay.pp_handle; 3959 int error_code; 3960 3961 if (smu_v13_0_6_cap_supported(smu, SMU_CAP(ACA_SYND))) 3962 error_code = ACA_REG__SYND__ERRORINFORMATION(bank->regs[ACA_REG_IDX_SYND]); 3963 else 3964 error_code = ACA_REG__STATUS__ERRORCODE(bank->regs[ACA_REG_IDX_STATUS]); 3965 3966 return error_code & 0xff; 3967 } 3968 3969 static const struct aca_smu_funcs smu_v13_0_6_aca_smu_funcs = { 3970 .max_ue_bank_count = 12, 3971 .max_ce_bank_count = 12, 3972 .set_debug_mode = aca_smu_set_debug_mode, 3973 .get_valid_aca_count = aca_smu_get_valid_aca_count, 3974 .get_valid_aca_bank = aca_smu_get_valid_aca_bank, 3975 .parse_error_code = aca_smu_parse_error_code, 3976 }; 3977 3978 static void smu_v13_0_6_set_temp_funcs(struct smu_context *smu) 3979 { 3980 smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) 3981 == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL; 3982 } 3983 3984 static int smu_v13_0_6_get_ras_smu_drv(struct smu_context *smu, const struct ras_smu_drv **ras_smu_drv) 3985 { 3986 if (!ras_smu_drv) 3987 return -EINVAL; 3988 3989 if (amdgpu_sriov_vf(smu->adev)) 3990 return -EOPNOTSUPP; 3991 3992 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_HROM_EN_BIT)) 3993 smu_v13_0_6_cap_set(smu, SMU_CAP(RAS_EEPROM)); 3994 3995 switch (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)) { 3996 case IP_VERSION(13, 0, 12): 3997 *ras_smu_drv = &smu_v13_0_12_ras_smu_drv; 3998 break; 3999 default: 4000 *ras_smu_drv = NULL; 4001 break; 4002 } 4003 4004 return 0; 4005 } 4006 4007 static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { 4008 /* init dpm */ 4009 .get_allowed_feature_mask = smu_v13_0_6_get_allowed_feature_mask, 4010 /* dpm/clk tables */ 4011 .set_default_dpm_table = smu_v13_0_6_set_default_dpm_table, 4012 .populate_umd_state_clk = smu_v13_0_6_populate_umd_state_clk, 4013 .print_clk_levels = smu_v13_0_6_print_clk_levels, 4014 .force_clk_levels = smu_v13_0_6_force_clk_levels, 4015 .read_sensor = smu_v13_0_6_read_sensor, 4016 .set_performance_level = smu_v13_0_6_set_performance_level, 4017 .get_power_limit = smu_v13_0_6_get_power_limit, 4018 .is_dpm_running = smu_v13_0_6_is_dpm_running, 4019 .get_unique_id = smu_v13_0_6_get_unique_id, 4020 .init_microcode = smu_v13_0_6_init_microcode, 4021 .fini_microcode = smu_v13_0_fini_microcode, 4022 .init_smc_tables = smu_v13_0_6_init_smc_tables, 4023 .fini_smc_tables = smu_v13_0_6_fini_smc_tables, 4024 .init_power = smu_v13_0_init_power, 4025 .fini_power = smu_v13_0_fini_power, 4026 .check_fw_status = smu_v13_0_6_check_fw_status, 4027 /* pptable related */ 4028 .check_fw_version = smu_v13_0_6_check_fw_version, 4029 .set_driver_table_location = smu_v13_0_set_driver_table_location, 4030 .set_tool_table_location = smu_v13_0_set_tool_table_location, 4031 .notify_memory_pool_location = smu_v13_0_notify_memory_pool_location, 4032 .system_features_control = smu_v13_0_6_system_features_control, 4033 .send_smc_msg_with_param = smu_cmn_send_smc_msg_with_param, 4034 .send_smc_msg = smu_cmn_send_smc_msg, 4035 .get_enabled_mask = smu_v13_0_6_get_enabled_mask, 4036 .feature_is_enabled = smu_cmn_feature_is_enabled, 4037 .set_power_limit = smu_v13_0_6_set_power_limit, 4038 .get_ppt_limit = smu_v13_0_6_get_ppt_limit, 4039 .set_xgmi_pstate = smu_v13_0_set_xgmi_pstate, 4040 .register_irq_handler = smu_v13_0_6_register_irq_handler, 4041 .enable_thermal_alert = smu_v13_0_enable_thermal_alert, 4042 .disable_thermal_alert = smu_v13_0_disable_thermal_alert, 4043 .setup_pptable = smu_v13_0_6_setup_pptable, 4044 .get_bamaco_support = smu_v13_0_6_get_bamaco_support, 4045 .get_dpm_ultimate_freq = smu_v13_0_6_get_dpm_ultimate_freq, 4046 .set_soft_freq_limited_range = smu_v13_0_6_set_soft_freq_limited_range, 4047 .od_edit_dpm_table = smu_v13_0_6_usr_edit_dpm_table, 4048 .log_thermal_throttling_event = smu_v13_0_6_log_thermal_throttling_event, 4049 .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, 4050 .get_gpu_metrics = smu_v13_0_6_get_gpu_metrics, 4051 .get_pm_metrics = smu_v13_0_6_get_pm_metrics, 4052 .get_xcp_metrics = smu_v13_0_6_get_xcp_metrics, 4053 .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range, 4054 .mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported, 4055 .mode1_reset = smu_v13_0_6_mode1_reset, 4056 .mode2_reset = smu_v13_0_6_mode2_reset, 4057 .link_reset = smu_v13_0_6_link_reset, 4058 .wait_for_event = smu_v13_0_wait_for_event, 4059 .i2c_init = smu_v13_0_6_i2c_control_init, 4060 .i2c_fini = smu_v13_0_6_i2c_control_fini, 4061 .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, 4062 .send_rma_reason = smu_v13_0_6_send_rma_reason, 4063 .reset_sdma = smu_v13_0_6_reset_sdma, 4064 .dpm_reset_vcn = smu_v13_0_6_reset_vcn, 4065 .post_init = smu_v13_0_6_post_init, 4066 .ras_send_msg = smu_v13_0_6_ras_send_msg, 4067 .get_ras_smu_drv = smu_v13_0_6_get_ras_smu_drv, 4068 }; 4069 4070 void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) 4071 { 4072 smu->ppt_funcs = &smu_v13_0_6_ppt_funcs; 4073 smu->message_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? 4074 smu_v13_0_12_message_map : smu_v13_0_6_message_map; 4075 smu->clock_map = smu_v13_0_6_clk_map; 4076 smu->feature_map = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12)) ? 4077 smu_v13_0_12_feature_mask_map : smu_v13_0_6_feature_mask_map; 4078 smu->table_map = smu_v13_0_6_table_map; 4079 smu->smc_driver_if_version = SMU_IGNORE_IF_VERSION; 4080 smu->smc_fw_caps |= SMU_FW_CAP_RAS_PRI; 4081 smu_v13_0_set_smu_mailbox_registers(smu); 4082 smu_v13_0_6_set_temp_funcs(smu); 4083 amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs); 4084 amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs); 4085 } 4086 4087